#!/usr/bin/perl # countByBox # goes through Cabaniss directory in archive and in web directories # gives a count by box of digital files # and a total for each set # jody DeRidder # 1/6/10 ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 6/10/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $stored = "/srv/archive/u0003/0000252/"; $web = "/srv/www/htdocs/content/u0003/0000252/"; opendir (AR, $stored) or die "can't look through $stored\n"; while ($file = readdir(AR)){ if ($file =~ /000000/ || $file =~ /^\./){ #skip dot files and previous digitized items next; } $path = $stored.$file; if (-d $path){ push (@mydirs, $path."/");} } close (AR); foreach $dir (@mydirs){ opendir (DIR, $dir) or die "can't look through $dir\n"; while ($file = readdir(DIR)){ if ($file =~ /^\./){ #skip dot files and previous digitized items next; } $path = $dir.$file; if (-d $path){ push (@mydirs, $path."/"); } elsif ($file =~ /\.tif$/){ # get box number and increment count if ($file =~ /u0003\_0000252\_(\d{2})\d{5}/){ $box = $1; $archive{$box} += 1; $archiveTotal += 1; } else{ print "ERROR in filename! $file in $path\n";} } } close (DIR); } undef @mydirs; opendir (WEB, $web) or die "can't look through $web\n"; while ($file = readdir(WEB)){ if ($file =~ /000000/ || $file =~ /^\./){ #skip dot files and previous digitized items next; } $path = $web.$file; if (-d $path){ push (@mydirs, $path."/");} } close (WEB); foreach $dir (@mydirs){ opendir (DIR, $dir) or die "can't look through $dir\n"; while ($file = readdir(DIR)){ if ($file =~ /^\./ || $file =~ /\.xml/ || $file =~ /\.txt/ || $file =~ /\_512\.jpg$/ || $file =~ /\_128\.jpg$/){ #skip dot files and previous digitized items next; } $path = $dir.$file; if (-d $path){ push (@mydirs, $path."/"); } elsif ($file =~ /\_2048\.jpg$/){ # get box number and increment count if ($file =~ /u0003\_0000252\_(\d{2})\d{5}/){ $box = $1; $webcount{$box} += 1; $webTotal += 1; } else{ print "ERROR in filename! $file in $path\n";} } } close (DIR); } print "Total files in archive: $archiveTotal; on the web: $webTotal\n\n"; print "In the archive:\n"; @boxesa = sort keys (%archive); @boxesw = sort keys (%webcount); # use the box list with the most boxes $boxcount_a = scalar @boxesa; $boxcount_w = scalar @boxesw; if ($boxcount_w > $boxcount_a){ @boxes = $boxesw; } else{ @boxes = @boxesa;} print "BOX ARCHIVE WEB\n________________\n"; foreach $box (@boxesa){ print "$box ".$archive{$box}." ".$webcount{$box}."\n"; if ($archive{$box} != $webcount{$box}){ $error = 1; push (@errors, $box);} } if ($error){ print "\n\nRUN findMissingFile script if the following boxes\nare in both Acumen and archive\n\n"; foreach (@errors){ print "Box $_ has a discrepancy\n";} }