#!/usr/bin/perl # testboxfolder # rewriting this so it gets the info from one EAD at a time, # then hunts for the content in the web directory. # also to use the following rules for matching boxes and folders: # If I remove the SC and leading zeros (and spaces) from box numbers both in the item metadata and in that collection's EAD, the box numbers should match. # If I remove anything before a hyphen or a period, and remove leading zeros from box numbers both in the item metadata and in that collection's EAD, the box numbers should match. # If I run across "frame" treat it as a box number. # If I run across "volume" treat it as box unless there is a box, then treat it as a folder. # If I run across a box with no folder, or an empty folder, assume it is folder 1; if then I find I have content for multiple folders for that collection, stop and make a note that that EAD requires alteration (or a different script that creates folders). # pulls out box and folder information if existing # writes to NOTINEAD and FOUND as well as outputs total count found and not found # THIS DOES NOT TAKE CARE OF IMAGE OR AUDIO COLLECTIONS FROM MANUSCRIPT COLLECTIONS # jody DeRidder, 2/15/10 # updated 3/24/10 to use allOnePlace list of collections where all content is in single box/folder in finding aid, so no match is needed ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $allOnePlace = "./allOnePlace"; $eadbase = "/srv/deposits/EADs/online/"; $base = "/srv/www/htdocs/content/"; open(ONE, $allOnePlace) or die "can't read $allOnePlace\n"; while ($line = ){ chomp $line; if ($line =~ /^[a-z]{1}\d{4}.*/){push (@allone, $line);} } close (ONE); $sums = ">./sums"; open(SUMS, $sums) or die "can't write to $sums\n"; $eadBoxFolders = ">EADboxFolders\n"; open(EADBF, $eadBoxFolders) or die "can't write to $eadBoxFolders\n"; $notInEADlist = ">./notInEAD"; open (NOTINEAD, $notInEADlist) or die "can't write to $notInEADlist\n"; $out = ">./found"; open (FOUND, $out) or die "can't write to $out\n"; opendir(EADS, $eadbase) or die "can't look through $eadbase\n"; while ($file = readdir(EADS)){ if ($file =~ /^(u0003\_\d{7})\.ead\.xml/){ $collnum = $1; if ($collnum eq "u0003_0000252"){ next; } # we're already doing Cabaniss undef $found; foreach $all (@allone){ if ($all eq $collnum){ $found = 1;} } if ($found){ next;} $path = $eadbase.$file; #print "looking at $file\n"; undef $foundsome; undef $eadLine; open(EAD, $path) or die "can't read $file\n"; while ($line = ){ if ($line =~ // || $line =~ /<\/?container/){ chomp $line; $eadLine .= $line; } } close(EAD); undef @containers; undef @boxfolders; undef @eadMatch; undef @myerrors; @containers = split("", $eadLine); foreach $con (@containers){ undef $b; undef $f; if ($con =~ /]*type="Box"[^>]*> *([^<]*?) *]*type="Folder"[^>]*> *([^<]*?) *]*type="Frame"[^>]*> *([^<]*?) *]*type="Volume"[^>]*> *([^<]*?) *){ chomp $line; $bigline .= $line; } close(MODS); if ($bigline =~ /([^<]*?) *([^<]*?) *