#!/usr/bin/perl use File::Copy; use DBI; # relocatingBd # jody DeRidder, 10/21/09 # this version to reorder born digital content from Metadata Unit # into archival storage system # this will check to see if collection info is already in the database. # if it is, will use that info for Manifests -- if not, will insert it from xml file # and if xml in admin folder does not exist, will die with an error. # expects file to be of this form: http://intranet.lib.ua.edu/wiki/digcoll/index.php/Collection_Information # # # # # # # # # # # corresponding locations for this in analogColls: # filename (minus extension) -> id_2009 # Digital_Collection_Name -> title # Alphabetized_By -> alphaBy # Type_Of_Content -> type # Analog_Collection_Name -> sourceCollName # Manuscript_Number -> mssNum # Finding_Aid_Link -> mssUrl # Digital_Collection_Description -> blurb # additional info that should be figured out and entered: # cannedLink (based on type if specific icon does not exist) # iconLocation # online (1 or 0) # WARNING! Before running this, do a chmod -R 755 /archive # after running it, do chmod -R 555 /archive # this will enable you to write to the directories as root -- # and then close off that ability afterwards. # after writing this, add links/manifests as needed on top level of /srv/www/htdocs/lockss/ directories # (should be indicated by this script's output) # check links at http://libcontent1.lib.ua.edu/lockss/Manifest.html # then run checkem in this directory -- it will verify md5 sums of content that was moved, # and delete from the deposits directory. # then go look in the deposits directory, make sure folders are clean, and delete them. # Note that this EXPECTS scans in a Scans directory!! not on top level. # rewrite this to pick up only from deposits, and to avoid overwrites!! ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #$test = 1; # location hardcoded here: $inbase = "/srv/deposits/bornDigital/"; $outbase = "/srv/archive/"; $linkbase = "http://libcontent1.lib.ua.edu/lockss"; $database = "InfoTrack"; $hostname = "localhost"; $port = "3306"; $user = "user"; $password = "password"; $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; $D = $dbh->quote("D"); # digital $moveme = "./moveme"; open (MOVE, ">".$moveme) or die "can't add to $moveme\n"; # COMMENT OUT and uncomment manifest line below when ready to write manifests if ($test){ open (OUT, ">>RelocateManifests") or die "can't write to RelocateManifests \n";} opendir (BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./ ){ next; } # skip dot files # print "looking at $file\n"; if ($file =~ /^([a-z]{1}[\d]{4})\_([\d]{7})/){ $topdir = $1; $secdir = $2; $collnum = $topdir."_".$secdir; $collbase = $outbase.$topdir."/".$secdir."/"; $upperman = $outbase.$topdir."/Documentation/Manifest.html"; if (-e $upperman){ # need to alter this manifest to link in this collection print "ALTER MANIFEST: add $collnum to $upperman\n"; ${$alterMan{$upperman}}{$topdir."_".$secdir} = " "; # add title later -- is there one? } else{ # need to create this manifest print "CREATE MANIFEST: add $collnum to $upperman\n"; # HERE following doesn't work correctly -- lists things that exist. ${$makeMan{$upperman}}{$topdir."_".$secdir} =" "; # add title later -- is there one? } $olddir = $inbase.$file."/"; if (! -e $collbase){ `mkdir -p $collbase`; } # we will look here for png, jpg, gif, txt, rtf files # rename the image and text files for the collection number, and # store under "Documentation" at the collection level $admindir = $inbase.$file."/Admin"; # print "old admin dir for $file is $admindir\n"; $docdir = $collbase."Documentation"; $oldMDdir = $inbase.$file."/Metadata/"; $newMDdir = $collbase."Metadata"; $oldTrans = $inbase.$file."/Transcripts/"; if (! -e $docdir){ `mkdir -p $docdir`; } if (! -e $newMDdir){ `mkdir -p $newMDdir`; } # we also need to find scans directories. What are they? undef @scandirs; opendir(COLL, $olddir) or die "can't look in $olddir\n"; while ($file = readdir(COLL)){ if ($file =~ /^\./){ next;} # no dot directories if ($file =~ /.*?Content.*?/i){ $adir = $olddir.$file; if ( -d $adir){ # make sure this is a directory push (@scandirs, $adir); } } } close(COLL); # we need to look for $base.$file."/Admin/$collnum\.xml # to extract the title for the collection # NOTE!! this does NOT pick up collnum.2.xml, collnum.3.xml, etc. # if this collection is another from the same analog collection, there may be different numbers $collinfo = $admindir."/".$collnum.".xml"; print "looking for $collinfo\n"; $found = 1; if (! (-e $collinfo)){ undef $found; for ($i=1; $i < 10; $i++){ $testme = "$admindir/$collnum.$i.xml"; if (-e $testme){ $collinfo = $testme; $found = 1; $last; } } } # we need a copy of the collection file to create/alter manifests, with each dump. if (! $found){ $id = $dbh->quote($collnum); $sth = $dbh->prepare("select title from digColls where id_2009 like $id and AnalogOrDigital like $D") or die "can't prepare select for $collnum to see if it's up: ", $dbh->errstr(),"\n"; $sth->execute() or die "can't select to see if $collnum is up: ", $dbh->errstr(),"\n"; ($title) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); $sth->finish(); } else{ open (INFO, $collinfo) or die "can't open $collinfo\n"; undef $xml; undef $title; while ($line = ){ $line =~ s,\r,,g; # no Windows newlines $line =~ s,\. \"(\s),\.\"$1,g; # no space between period and quote if ($line =~ /Digital\_Collection\_Name/){ ( $title = $line) =~ s,\<\/?Digital\_Collection\_Name\>,,g; chomp $title; } } close (INFO); } if (!$title){ die "ERROR, no $title from $collinfo\n";} # add titles for manifests if (${$makeMan{$upperman}}{$collnum}){ ${$makeMan{$upperman}}{$collnum} = $title;} if (${$alterMan{$upperman}}{$collnum}){ ${$alterMan{$upperman}}{$collnum} = $title;} $manifest = $docdir."/Manifest.html"; $newbie = 1; # if this value exists, it's a new manifest being written # the only manifest not in this pattern is u0003_0000252, cabaniss, it's old. if (-e $manifest){ # we don't want to overwrite an existing manifest. # how do I add to one that exists???? HERE undef $newbie; open (MAN, $manifest) or die "can't read in $manifest\n"; # note that we may not need ALL these levels, but we need to support this many, just in case undef @level1; # end just after "Administrative Information" -- add more here. undef @level2; # end before transcripts or item level metadata; contains collection metadata undef @level3; # end before item level metadata; contains transcripts undef @level4; #end before content; contains item level metadata undef @level5; # end before last undef @level6; # last lines undef $itemMDExists; undef $transcriptsExist; $level = 1; while ($m = ){ if ($m =~ /Administrative Information *<\/h3>/){ push (@level1, $m); $level = 2; } elsif ($m =~ /Collection Level Metadata *<\/h3>/){ push (@level2, $m); $level = 3; } elsif ($m =~ /Metadata *<\/h3>/){ push (@level3, $m); # add these headers later $itemMDExists = 1; $level = 4; } elsif ($m =~ /Content *<\/h3>/){ push (@level4, $m); # skip level 5; $level = 6; } elsif ($level == 1){ push(@level1, $m);} elsif ($level == 2){ push(@level2, $m);} elsif ($level == 3){ push(@level3, $m);} elsif ($level == 4){ push(@level4, $m);} elsif ($level == 6){ push(@level6, $m);} } close(MAN); # now, start to rewrite it: if (! $test){ open (OUT, ">".$manifest) or die "can't write to $manifest\n"; } foreach (@level1){ print OUT $_;} } else{ if (! $test){ open(OUT, ">".$manifest) or warn "can't open $manifest to write.\n";} $head = ' '.$title.' Manifest Page

'.$title.' '.$collnum.' Manifest Page

'; $tail = '

LOCKSS system has permission to collect, preserve, and serve this Archival Unit

'; print OUT $head; } # now, move other content from the admin folder opendir(ADMIN, $admindir) or die "can't open $admindir\n"; if ($newbie){ print OUT " \n"; } else{ foreach (@level2){ print OUT $_;} # this prints the rest of the existing Admin data, and starts the # collection level metadata section } # next, metadata if (-e $oldMDdir){ undef $metsdir; opendir(MD, $oldMDdir) or die "can't open $oldMDdir\n"; # if ($newbie){ print OUT "