#!/usr/bin/perl use Time::Local; # findMissing # hunts through given set of directories (this version for the web content) # to locate what items have NO metadata # and to locate what metadata has no derivatives # jody DeRidder, 8/20/09 ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ×tamp; $indir = "/srv/www/htdocs/content/"; $rmthis = "\/srv\/www\/htdocs\/content\/"; # for taking this out of the filename $out = "./output/findMissing_".$timestamp.".txt"; open (OUT, ">".$out) or die "can't open $out\n"; opendir(IN, $indir) or die "can't look through $indir\n"; undef $problem; while ($file = readdir(IN)){ if ($file =~ /^\./){ next;} push (@topdirs, $indir.$file."/"); } close(IN); # collection level foreach $dir (@topdirs){ opendir(IN, $dir) or die "can't look through $dir\n"; while ($file = readdir(IN)){ if ($file =~ /^\./ || $file =~ /Transcripts/ || $file =~ /Metadata/){ next;} push (@colldirs, $dir.$file."/"); } close(IN); } # now, item level foreach $dir (@colldirs){ opendir(IN, $dir) or die "can't look through $dir\n"; ($parent = $dir) =~ s,$rmthis,,; # take out top directories from name $parent =~ s,\/,\_,g; # substitute underscores for / in directory name # to recompose filename while ($file = readdir(IN)){ # looking through the collection directory undef $noItemMd; if ($file =~ /^\./|| $file =~ /Transcripts/ || $file =~ /Metadata/){ next;} $this = $dir.$file; # is this file a subdirectory? It should be $folder = $this."/Metadata/"; # and it should have a metadata subdirectory for this item's metadata if (! -e $folder){ $noItemMd = $parent."_".$file; $noItemMd =~ s,\_\_+,\_,g; # remove multiple underscores # print "noItemMD for $noItemMd -- check children\n"; undef $gotone; # push (@noMD, $this); } else{ $mymods = $folder.$parent."_".$file.".mods.xml"; #ItemDirectory/Metadata/collnum_itemnum.mods.xml $mymods =~ s,\_\_+,\_,g; # remove multiple underscores # print "looking for $mymods\n"; if (! -e $mymods){ # nothing on this level $noItemMd = $parent."_".$file; $noItemMd =~ s,\_\_+,\_,g; # remove multiple underscores # print "noItemMD for $noItemMd -- check children\n"; undef $gotone; # push (@noMD, $dir.$file); } } opendir(ITEM, $this) or die "can't look in $this\n"; # now looking in the item directory undef $found; while ($afile = readdir(ITEM)){ if ($afile =~ /Metadata/ || $afile =~ /Transcripts/){ next;} # did that already if ($afile =~ /^\./){ next;} # no dots $path = $this."/".$afile; if (-d $path){ # consider this "found" as the item may have subpages $found = 1; push(@subdirs, $path); if ($noItemMd){ # test for MD in child files $thisMods = $noItemMd."_".$afile.".mods.xml"; $thisModsPath = $path."/Metadata/".$thisMods; # print "looking for $thisModsPath\n"; if (! -e $thisModsPath){ push (@noMD, $path);} else { $gotone = 1;} } next; } elsif ($afile =~ /.+\.\w{3}$/){ #extension is 3 letters: derivative $found = 1; # Success # audio may have transcripts in subdirectories we've already put on the subdirs array foreach $sd (@subdirs){ if (!($sd =~ $this)){ push (@sds);} } @subdirs = @sds; next; } } if ($noItemMd && (!$gotone)){ push (@noMD, $dir.$file);} # no item MD and no sub item MD close(ITEM); if (! $found){ #no indication of a derivative push (@noDer, $this); } } close(IN); } # now, subitem level foreach $dir (@subdirs){ # print "looking through $dir\n"; opendir (IN, $dir) or die "can't look through $dir\n"; undef $found; while ($file = readdir(IN)){ if ($file =~ /^\./ || $file =~ /Metadata/ || $file =~ /Transcripts/){ next;} # subdirs don't have to have metadata # print "looking at $file\n"; $path = $dir."/".$file; if ($file =~ /.+\.\w{3}$/){ #extension is 3 letters: derivative $found = 1; # success # print "$file is a MATCH!\n"; last; } elsif (-d $path){ # consider this "found" as the sub item may have subpages $found = 1; push(@subdirs, $path); next; } } close(IN); if (! $found){ #no indication of a derivative push (@noDer, $dir."/".$file); } } if (@noMD){ $problem = 1; @sorted = sort(@noMD); print OUT "********* MISSING METADATA *************\n"; foreach $dir (@sorted){ $dir =~ s,\/srv\/www\/htdocs\/content\/,,; $dir =~ s,\/,\_,g; print OUT " $dir\n"; } print OUT "\n\n"; } if (@noDer){ $problem = 1; @sorted = sort(@noDer); print OUT "********* MISSING DERIVATIVES *************\n"; foreach $dir (@sorted){ $dir =~ s,\/srv\/www\/htdocs\/content\/,,; $dir =~ s,\/,\_,g; print OUT " $dir\n"; } print OUT "\n\n"; } if ($problem){ print "\n\nCorrections are needed. \n\nPlease check the output file for results: $out\n\n"; } else{ print "Everything looks great!!! Thank you. No problems found.\n\n"; print OUT "Everything looks great!!! Thank you. No problems found.\n\n"; } close(OUT); exit; sub timestamp{ #following for Windows #print "hit enter twice please\n"; #$date = `date`; #$time = `time`; #print $date."\n"; #if ($date =~ /.*? (\d*)\/(\d*)\/(\d*)/){ # $date = $3.$1.$2; # } #print $date."\n"; #if ($time =~ /.*? (\d*)\:(\d*)\:(\d*)\./){ # $time = $1.$2.$3; # } #print $time."\n"; #$timestamp = $date."T".$time; #print $timestamp."\n"; # following for unix ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime(); ##$year += 1900; $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $timestamp= "$year-$mon-$mday\T$hour:$min:$sec\Z"; }