#!/usr/bin/perl use File::Copy; use DBI; # relocatingEads # jody DeRidder, 11/3/09 # this version to copy eads # into archival storage system #BEFORE THIS: run eadToDbase in /srv/scripts/eads to add to database, and # getead in /srv/scripts/surfacing/ to put it in Acumen live # WARNING! Before running this, do a chmod -R 755 /archive # after running it, do chmod -R 555 /archive # this will enable you to write to the directories as root -- # and then close off that ability afterwards. # after writing this, add links/manifests as needed on top level of /srv/www/htdocs/lockss/ directories # (should be indicated by this script's output) # check links at http://libcontent1.lib.ua.edu/lockss/Manifest.html # then run checkem in this directory -- it will verify md5 sums of content that was moved, # and delete from the deposits directory. # then go look in the deposits directory, make sure folders are clean, and delete them. ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #$test = 1; #$icon = "http://content.lib.ua.edu/cdm4/images/mss.icon.png"; # location hardcoded here: # alter this to just the new ones $inbase = "/srv/deposits/EADs/new/"; $outbase = "/srv/archive/"; $linkbase = "http://libcontent1.lib.ua.edu/lockss"; $moveme = "./moveme"; $hostname = "localhost"; $port = "3306"; $user = "user"; $password = "password"; $database = "InfoTrack"; $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; open (MOVE, ">".$moveme) or die "can't add to $moveme\n"; # COMMENT OUT and uncomment manifest line below when ready to write manifests if ($test){ open (OUT, ">>RelocateManifests") or die "can't write to RelocateManifests \n";} opendir (BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./ ){ next; } # skip dot files # print "looking at $file\n"; $thisone = $inbase.$file; # get the title please open(IN, $thisone) or die "can't read $thisone\n"; undef $title; while ($line = ){ chomp $line; if ($line =~ /(.*)/i){ $title = $1; $title =~ s,guide to the ,,i; $title =~ s,gudie to the ,,i; $title =~ s,guide tot he ,,i; $title =~ s,guide to ,,i; } } close(IN); if (! $title){ die "can't find a title for $thisone\n";} else{ print $file ." $title\n";} if ($file =~ /^([a-z]{1}[\d]{4})\_([\d]{7})/){ $topdir = $1; $secdir = $2; $collnum = $topdir."_".$secdir; $collbase = $outbase.$topdir."/".$secdir."/"; $middleman = $outbase.$topdir."/Documentation/Manifest.html"; $upperman = $outbase.$topdir."/Documentation/Manifest.html"; if (-e $upperman){ # need to alter this manifest to link in this collection print "ALTER MANIFEST: add $collnum to $upperman\n"; ${$alterMan{$upperman}}{$topdir."_".$secdir} = " "; # add title later -- is there one? } else{ # need to create this manifest print "CREATE MANIFEST: add $collnum to $upperman\n"; ${$makeMan{$upperman}}{$topdir."_".$secdir} =" "; # add title later -- is there one? } $olddir = $inbase.$file."/"; if (! -e $collbase){ `mkdir -p $collbase`; } $docdir = $collbase."Documentation"; $newMDdir = $collbase."Metadata"; if (! -e $docdir){ `mkdir -p $docdir`; } if (! -e $newMDdir){ `mkdir -p $newMDdir`; } # add titles for manifests if (${$makeMan{$upperman}}{$collnum}){ ${$makeMan{$upperman}}{$collnum} = $title;} if (${$alterMan{$upperman}}{$collnum}){ ${$alterMan{$upperman}}{$collnum} = $title;} # print "$collnum --> $title\n"; $manifest = $docdir."/Manifest.html"; $newbie = 1; # if this value exists, it's a new manifest being written # the only manifest not in this pattern is u0003_0000252, cabaniss, it's old. if (-e $manifest){ # we don't want to overwrite an existing manifest. # how do I add to one that exists???? HERE undef $newbie; open (MAN, $manifest) or die "can't read in $manifest\n"; # note that we may not need ALL these levels, but we need to support this many, just in case undef @before; # end just after "Collection Level Metadata" add EAD here. undef @after; # the remainder of the file $level = 1; undef $found; while ($m = ){ if ($level){ push (@before, $m); if ($m =~ /Collection Level Metadata *<\/h3>/){ undef $level; $found = 1; push (@before, $m); } } else {push (@after, $m);} } close(MAN); if (!$found){ print "ERROR! No collection level metadata heading in $manifest. DID NOT LINK!! \n";} # now, start to rewrite it: if (! $test){ open (OUT, ">".$manifest) or die "can't write to $manifest\n"; } foreach (@before){ print OUT $_;} } else{ # this writes a new manifest if (! $test){ open(OUT, ">".$manifest) or warn "can't open $manifest to write.\n";} $head = ' '.$title.' Manifest Page

'.$title.' '.$collnum.' Manifest Page

    Administrative Information

    Collection Level Metadata

    '; $tail = '

    Metadata

    Transcripts

    Content

LOCKSS system has permission to collect, preserve, and serve this Archival Unit

'; print OUT $head; } $old = $thisone; $new = $newMDdir."/".$file; if (-e $new){ $val = `diff $old $new`; if ($val){ print "versioning! an older version of $file already exists!\n"; } else{ print "$new does not differ from $old. Not copying or linking.\n"; next; } } ©this ($old, $new, $file, 0); # overwrite unversioned copy with new version $version = 1; ($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $newMDdir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension print "version will be $v\n"; $newv = $newMDdir."/".$v; } ©this ($old, $newv, $v, 1); if ($newbie){ print OUT $tail; } else{ foreach (@after){ print OUT $_;} # prints remainder of existing manifest } if (!$test){close OUT;} } # end of whether the file found matches the pattern } # end of the directory close(BASE); if (! $test){ open (OUT, ">parentMans") or die "can't write to parentMans\n";} # manifest time @makethese = keys (%makeMan); @alterthese = keys (%alterMan); # CHANGE THIS TO SUIT WHAT IS NEEDED $mhead = ' University of Alabama Hoole Special Collections Manucripts u0003 Manifest Page

University of Alabama '; $mtail = '

LOCKSS system has permission to collect, preserve, and serve this Archival Unit

'; if (@makethese){ print "\n\nNEED TO CREATE MANIFESTS: \n"; foreach $m (@makethese){ if (-e $m){ next; } #print "WHY MAKE $m? IT EXISTS!\n"; next;} print " $m \n"; %kids = %{$makeMan{$m}}; @colls = sort keys (%kids); foreach $c (@colls){ ($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path $mylink = $linkbase."/".$p."/Documentation/Manifest.html"; # while (($c,$t) = each (%kids)){ print "
  • ".$kids{$c}." $c Manifest
  • \n"; } print "\n"; if (!$m =~ /\d{7}/){ push (@thesetoo, $m); } } } if (@alterthese){ print "\n\nADDING LINK TO MANIFEST FROM LEVEL ABOVE: \n"; foreach $a (@alterthese){ if (! $a){ next;} undef @gotthese; undef @rewrite; print " $a \n"; open (AMAN, $a) or die "can't read $a\n"; while ($line = ){ if ($line =~ /<\/ul>/){ # end of list of manifest links last; } if ($line =~ /
  • .* ([a-z]{1}\d{4}\_\d{7})(\_\d{7})? /){ push (@gotthese, $1.$2); } push (@rewrite, $line); } close (AMAN); %kids = %{$alterMan{$a}}; @colls = sort keys (%kids); foreach $c (@colls){ ($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path # print "$p is a child collection of $a. Is this correct?\n"; $mylink = $linkbase."/".$p."/Documentation/Manifest.html"; undef $found; foreach $g (@gotthese){ if ($g eq $c){ $found = 1;} } if (! $found){ push (@rewrite, "
  • ".$kids{$c}." $c Manifest
  • \n"); # print "
  • ".$kids{$c}." $c Manifest
  • \n"; } else { print "found $g in $a already!!!\n";} # print " $c\t".$kids{$c}\n"; #while (($c,$t) = each (%kids)){ #print " $c\t$t\n"; } if (! $test){ open (AMAN, ">".$a) or die "can't write to $a\n"; foreach (@rewrite){ print AMAN $_;} print AMAN $mtail; close(AMAN); } if (@rewrite){ print OUT "\n\n -- --- ALTERED $a -----\n\n"; foreach (@rewrite){ print OUT $_;} print OUT $mtail; print "\n"; } } } close (OUT); @sorted = sort (@thesetoo); if (@sorted){ print "\n\nALTER TOP LEVEL MANIFEST TO INCLUDE THESE!\n"; foreach (@sorted){ print " $_\n";} } $dbh->disconnect(); exit; # old = file to copy and where # new = what to name it and where # file = filename alone with extension # ready is if we want to link it or not -- 0 means no, 1 means yes # if level is present -- last parameter -- what level of Manifest to link it into... # see ETD script for this sub copythis{ ($old, $new, $file, $ready) = @_; ($whichdir = $new) =~ s,\/srv\/archive,,; # oops, don't want this in link if (! -e $new || $ready == 0){ # allow overwrite of metadata which is not version print MOVE "$old $new\n"; if (! $test){ copy ($old, $new) or die "cannot copy $old to $new\n"; } if ($ready){ # do NOT link in metadata files yet, except METS and MODS for etds $mylink = $linkbase.$whichdir; print OUT "\t
  • $file
  • \n"; } } # else{ print "WARNING! $new exists, NOT overwriting with $old\n";} # note, if there is more than one jpg, png or gif, only the first found is saved # else { print "NOT SAVING $collnum admin folder $file\n";} }