#!/usr/bin/perl use File::Copy; use DBI; # relocateCabaniss # jody DeRidder, 6/11/09 ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # this version to reorder content from share drive # into archival storage system # first, do a diff from /srv/deposits/content to the share drive area # to see if we got the content ok; then delete off share # then go to the collstuff directory ../collstuff and run collToDbase # to put the collection info in the database. # MODIFY to add links for online in Tonio's software # WARNING! Before running this, do a chmod -R 755 /archive # after running it, do chmod -R 555 /archive # this will enable you to write to the directories as root -- # and then close off that ability afterwards. # after writing this, add links/manifests as needed on top level of /srv/www/htdocs/lockss/ directories # (should be indicated by this script's output) # check links at http://libcontent1.lib.ua.edu/lockss/Manifest.html # then run checkem in this directory -- it will verify md5 sums of content that was moved, # and delete from the deposits directory. # then go look in the deposits directory, make sure folders are clean, and delete them. # Note that this EXPECTS scans in a Scans directory!! not on top level. # rewrite this to pick up only from deposits, and to avoid overwrites!! #$test = 1; # location hardcoded here: $inbase = "/srv/deposits/Cabaniss/"; $outbase = "/srv/archive/"; $linkbase = "http://libcontent1.lib.ua.edu/lockss"; $moveme = "./moveme"; $hostname = "localhost"; $port = "3306"; $user = "user"; $password = "password"; $database = "InfoTrack"; #$oaibase = "oai:content.lib.ua.edu:$m/"; # add dnum $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; open (MOVE, ">".$moveme) or die "can't add to $moveme\n"; # COMMENT OUT and uncomment manifest line below when ready to write manifests if ($test){ open (OUT, ">>RelocateManifests") or die "can't write to RelocateManifests \n";} opendir (BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./ ){ next; } # skip dot files # print "looking at $file\n"; undef $thirdir; undef $secdir; undef $topdir; if ($file =~ /^([a-z]{1}[\d]{4})\_([\d]{7})(\_)?([\d]{7})?/){ $topdir = $1; $secdir = $2; $thirdir = $4; if (! $thirdir){ $collnum = $topdir."_".$secdir; $collbase = $outbase.$topdir."/".$secdir."/"; } else{ $collnum = $topdir."_".$secdir."_".$thirdir; $collbase = $outbase.$topdir."/".$secdir."/".$thirdir."/"; } $upperman = $outbase.$topdir."/Documentation/Manifest.html"; if (-e $upperman){ # need to alter this manifest to link in this collection print "ALTER MANIFEST: add $collnum to $upperman\n"; ${$alterMan{$upperman}}{$topdir."_".$secdir} = " "; # add title later -- is there one? } else{ # need to create this manifest print "CREATE MANIFEST: add $collnum to $upperman\n"; ${$makeMan{$upperman}}{$topdir."_".$secdir} =" "; # add title later -- is there one? } $olddir = $inbase.$file."/"; if (! -e $collbase){ `mkdir -p $collbase`; } # we will look here for png, jpg, gif, txt, rtf files # rename the image and text files for the collection number, and # store under "Documentation" at the collection level $admindir = $inbase.$file."/Admin"; # print "old admin dir for $file is $admindir\n"; $docdir = $collbase."Documentation"; $oldMDdir = $inbase.$file."/Metadata/"; $newMDdir = $collbase."Metadata"; $oldTrans = $inbase.$file."/Transcripts/"; if (! -e $docdir){ `mkdir -p $docdir`; } if (! -e $newMDdir){ `mkdir -p $newMDdir`; } # we also need to find scans directories. What are they? undef @scandirs; opendir(COLL, $olddir) or die "can't look in $olddir\n"; while ($file = readdir(COLL)){ if ($file =~ /^\./){ next;} # no dot directories if ($file =~ /.*?Scan.*?/i){ $adir = $olddir.$file; if ( -d $adir){ # make sure this is a directory push (@scandirs, $adir); } } } close(COLL); # we need to look for $base.$file."/Admin/$collnum\.xml # to extract the title for the collection # NOTE!! this does NOT pick up collnum.2.xml, collnum.3.xml, etc. # if this collection is another from the same analog collection, there may be different numbers $collinfo = $admindir."/".$collnum.".xml"; print "looking for $collinfo\n"; $found = 1; if (! (-e $collinfo)){ undef $found; for ($i=1; $i < 10; $i++){ $testme = "$admindir/$collnum.$i.xml"; if (-e $testme){ $collinfo = $testme; $found = 1; $last; } } # we need a copy of the collection file to create/alter manifests, with each dump. if (! $found){ undef $title; print "No admin xml file for $collnum; looking in database\n"; $id = $dbh->quote($collnum); $d = $dbh->quote("D"); $sth = $dbh->prepare("select title from allColls where id_2009 like $id and AnalogOrDigital like $d") or die "can't prepare select for $collnum to see if it's up: ", $dbh->errstr(),"\n"; $sth->execute() or die "can't select to see if $collnum is up: ", $dbh->errstr(),"\n"; ($title) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); if ($title){ print "$title is already in the database\n"; $sth->finish(); goto THISPART; } else{ $sth->finish(); die "not in database, and no xml file in $admindir\n"; } } } # HERE!! CHeck to see if each file begins with: # # # and ends with # if NOT, add them in ## --- ALL THIS IS IN ANOTHER SCRIPT NOW -- just need the title open (INFO, $collinfo) or die "can't open $collinfo\n"; undef $xml; undef $parentstart; undef $parentend; undef $title; undef @thisfile; while ($line = ){ $line =~ s,\r,,g; # no Windows newlines $line =~ s,\. \"(\s),\.\"$1,g; # no space between period and quote # try to repair MS word encodings of hyphens, quotes, apostrophes $line =~ s,\342\200\231,',g; # if you hexdump the file, in place of an apostrophe # you will see in the word line: 342 200 231 # hexdump -cox filename > output # gives octal, hex, and characters # or you can just hexdump -c and look for those goofy things $line =~ s,\342\200\230,',g; $line =~ s,\342\200\235,",g; $line =~ s,\342\200\234,",g; $line =~ s,\342\200\233,\-\-,g; $line =~ s,\342\200\224,\-\-,g; $line =~ s,\342\200\223,\-\-,g; $line =~ s,\342\200\246,\-,g; $line =~ s,\357\277\275,\',g; $line =~ s,\222,\',g; # shows up as <92> $line =~ s,\226,\-,g; # shows up as <96> $line =~ s,> *"(.*)" *<,>\1<,; # try to remove extraneous quotes $line =~ s, \& , \&\; ,g; #encode ampersand if ($line =~ /Digital\_Collection\_Name/){ ( $title = $line) =~ s,\<\/?Digital\_Collection\_Name\>,,g; chomp $title; } } close (INFO); THISPART: if (!$title){ die "ERROR, no $title from $collinfo\n";} # add titles for manifests if (${$makeMan{$upperman}}{$collnum}){ ${$makeMan{$upperman}}{$collnum} = $title;} if (${$alterMan{$upperman}}{$collnum}){ ${$alterMan{$upperman}}{$collnum} = $title;} # print "$collnum --> $title\n"; $manifest = $docdir."/Manifest.html"; $newbie = 1; # if this value exists, it's a new manifest being written # the only manifest not in this pattern is u0003_0000252, cabaniss, it's old. if (-e $manifest){ # we don't want to overwrite an existing manifest. # how do I add to one that exists???? HERE undef $newbie; open (MAN, $manifest) or die "can't read in $manifest\n"; # note that we may not need ALL these levels, but we need to support this many, just in case undef @level1; # end just after "Administrative Information" -- add more here. undef @level2; # end before transcripts or item level metadata; contains collection metadata undef @level3; # end before item level metadata; contains transcripts undef @level4; #end before content; contains item level metadata undef @level5; # end before last undef @level6; # last lines undef $itemMDexists; undef $transcriptsExist; $level = 1; while ($m = ){ if ($m =~ /Administrative Information *<\/h3>/){ push (@level1, $m); $level = 2; } elsif ($m =~ /Collection Level Metadata *<\/h3>/){ push (@level2, $m); $level = 3; } elsif ($m =~ /Metadata *<\/h3>/){ # push (@level3, $m); # add these headers later $itemMDexists = 1; $level = 4; } elsif ($m =~ /Transcripts *<\/h3>/){ $transcriptsExist = 1; $level = 5; } elsif ($m =~ /Content *<\/h3>/){ $level = 6; } elsif ($level == 1){ push(@level1, $m);} elsif ($level == 2){ push(@level2, $m);} elsif ($level == 3){ push(@level3, $m);} elsif ($level == 4){ push(@level4, $m);} elsif ($level == 5){ push(@level5, $m);} elsif ($level == 6){ push(@level6, $m);} } close(MAN); # now, start to rewrite it: if (! $test){ open (OUT, ">".$manifest) or die "can't write to $manifest\n"; } foreach (@level1){ print OUT $_;} } else{ if (! $test){ open(OUT, ">".$manifest) or warn "can't open $manifest to write.\n";} $head = ' '.$title.' Manifest Page '; $tail = '

LOCKSS system has permission to collect, preserve, and serve this Archival Unit

'; print OUT $head; } # now, move other content from the admin folder opendir(ADMIN, $admindir) or warn "can't open $admindir\n"; if ($newbie){ print OUT " \n"; } else{ foreach (@level2){ print OUT $_;} # this prints the rest of the existing Admin data, and starts the # collection level metadata section } # next, metadata if (-e $oldMDdir){ undef $metsdir; undef $didOne; opendir(MD, $oldMDdir) or die "can't open $oldMDdir\n"; if ($newbie){ print OUT " off the array for level 4 if there's already item-level metadata from earlier if ($itemMDexists && ! $didOne){ $closer = pop(@level4); $didOne = 1; print "Removed $closer from level 4 array\n";} ($dirpath = $myplace) =~ s,\_,\/,g; # trade underscores for slashes $thisMDdir = $collbase.$dirpath."/Metadata"; if (! -e $thisMDdir){ `mkdir -p $thisMDdir`;} $newfile = $file; $thisMDdir =~ s,\/\/,\/,g; print "item level mods: $file going to $thisMDdir as $newfile\n"; $old = $oldMDdir."/".$file; $new = $thisMDdir."/".$newfile; undef $val; if (-e $new){ $val = `diff $old $new`; # print "Diff value -->$val<-- for $path versus $new\n"; } if ($val){ # check file dates. undef $dateAver; # archive version undef $dateDver; # deposit version open (AVER, $new) or die "can't look in $new\n"; # this will get the last recordChange date listed, if there is one while ($line = ){ if ($line =~ /]*> *([^<]*) *<\/recordCreationDate>/){ $thisdate = $1; if (!$dateAver){ $dateAver = $thisdate;} elsif ($thisdate > $dateAver){ $dateAver = $thisdate;} } elsif ($line =~ /]*> *([^<]*) *<\/recordChangeDate>/){ $thisdate = $1; if (!$dateAver){ $dateAver = $thisdate;} elsif ($thisdate > $dateAver){ $dateAver = $thisdate;} } } close(AVER); open (DVER, $old) or die "can't look in $old\n"; while ($line = ){ if ($line =~ /]*> *([^<]*) *<\/recordCreationDate>/){ $thisdate = $1; if (!$dateDver){ $dateDver = $thisdate;} elsif ($thisdate > $dateDver){ $dateDver = $thisdate;} } elsif ($line =~ /]*> *([^<]*) *<\/recordChangeDate>/){ $thisdate = $1; if (!$dateDver){ $dateDver = $thisdate;} elsif ($thisdate > $dateDver){ $dateDver = $thisdate;} push (@adates, $1); } } close(DVER); print "comparing deposit date of $dateDver and archive date of $dateAver\n"; if ($dateDver > $dateAver){ print "Deposited version of $file is newer than the archive version!\n";} else{ print "Deposited version of $file is NOT newer than the archive version\n"; undef $val;} } if ((! -e $new) || ($val)){ # print "$new already exists! overwriting and versioning\n"; ©this ($old, $new, $file, 0); # not linking these right now $version = 1; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $thisMDdir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension # print "version will be $v\n"; $newv = $thisMDdir."/".$v; } if ($myplace){ # item-level metadata # print "at this point, myplace = $myplace\n"; ©this ($old, $newv, $v, 1, 4); # last says put this on level 4 of manifest } else{ # collection level ©this ($old, $newv, $v, 1, 3); } } } # end if MODS elsif ($file =~ /$collnum\_\d{7}((\_\d{4})?(\_\d{3})?)/){ $myplace = $1; ($dirpath = $myplace) =~ s,\_,\/,g; # trade underscores for slashes $thisMDdir = $collbase.$dirpath."/Metadata/"; if (! -e $thisMDdir){ `mkdir -p $thisMDdir`;} $newfile = $collnum.$myplace.$ext; print "ERROR!! item level metadata??: $file going to $thisMDdir as $newfile\n"; $old = $oldMDdir."/".$file; $new = $thisMDdir."/".$newfile; # print MOVE "$oldMDdir/$file $newMDdir/$newfile\n"; ©this ($old, $new, $file, 0); # not linking these right now } # watch out for eads!! elsif ($file =~ /($collnum\.ead)\.xml/){ $myname = $1; # print MOVE "$oldMDdir/$file $newMDdir/$file\n"; if (! -e $newMDdir){ `mkdir -p $newMDdir`;} $newfile = $myname.$ext; $old = $oldMDdir."/".$file; $new = $newMDdir."/".$newfile; # check eadid open (EAD, $old) or die "can't read $old\n"; undef @myead; undef $found; while ($line = ){ if ($line =~ / *<\/eadid>/){ $line = " $collnum\n"; $found = 1; } push (@myead, $line); } close(EAD); if ($found){ # need to repair the file open (EAD, ">".$old) or die "can't write to $old\n"; foreach (@myead){ print EAD $_;} close(EAD); } if (-e $new){ print "ERROR! $newfile already exists!\n";} else{ ©this ($old, $new, $file, 0); # not linking these right now } $version = 1; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $newMDdir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension print "version will be $v\n"; $newv = $newMDdir."/".$v; } ©this ($old, $newv, $v, 1); } elsif ($file =~ /$collnum(\.\d{1,2})?/){ # if .1.txt $added = $1; # print MOVE "$oldMDdir/$file $newMDdir/$file\n"; if (! -e $newMDdir){ `mkdir -p $newMDdir`;} $newfile = $collnum.$added.$ext; # retains added number $old = $oldMDdir."/".$file; $new = $newMDdir."/".$newfile; if (-e $new){ print "ERROR! $newfile already exists!\n";} else{ ©this ($old, $new, $newfile, 0); # linking these right now } $version = 1; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $newMDdir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension print "version will be $v\n"; $newv = $newMDdir."/".$v; } ©this ($old, $newv, $v, 1); } else{ #renaming it as collection metadata. Beware! if more than one file... extra versions $newfile = $collnum.$ext; #descriptiveMetadata.txt becomes u0003_0000002.txt $old = $oldMDdir."/".$file; $new = $newMDdir."/".$newfile; if (-e $new){ print "ERROR! $newfile already exists!\n";} else{ ©this ($old, $new, $newfile, 0);} if ($newfile ne $file){print "just renamed $file to $newfile, but not linking it\n";} else{ # we want to link a version of this $version = 1; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $newMDdir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension print "version will be $v\n"; $newv = $newMDdir."/".$v; } ©this ($old, $newv, $v, 1); } } } elsif ($file =~ /^METS/){ $metsdir = $oldMDdir.$file; } else{ print "NOT SAVING $collnum metadata $file\n"; } } close (MD); if (@level3){ if ($newbie){ print OUT "

    Collection Level Metadata

    \n"; } foreach (@level3){ print OUT $_;} # prints existing coll-level metadata -- then start of next section if ($newbie){print OUT "
\n";} # above should include if ($itemMDexists){ print OUT "

    Metadata

    \n";} } if ($metsdir){ # SECTION FOR METS FILES opendir(METS, $metsdir) or die "can't open $metsdir\n"; if ((! $itemMDexists) || ($newbie)){ print OUT "

      Metadata

      \n";} while ($m = readdir (METS)){ if ($m =~/^(.*)\.mets\.xml/){ $id = $1; $version = 1; $v = $id.".mets.v$version.xml"; if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4}(\_[\d]{3})?)?$/){ if (($1 ne $topdir || $2 ne $secdir) || (($thirdir) && $3 ne $thirdir)){ print "FILENAME ERROR: $id in $metsdir\n"; next; } @parts = split ("_", $id); $old = $metsdir."/".$m; $where = join("/",@parts); # create directory structure needed $mydir = $outbase.$where."/Metadata"; # print MOVE "$metsdir/$m $mydir/$m\n"; if (! -e $mydir){ `mkdir -p $mydir`;} # make my directory if it does not exist $newv = $mydir."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; $v = $id.".mets.v$version.xml"; print "version will be $v\n"; $newv = $mydir."/".$v; } ©this ($old, $newv, $v, 1, 4); $newme = $mydir."/".$m; ©this ($old, $newme, $m, 0); $newme = $mydir.$m; } } } close (METS); } # end of if METS dir } # end of old MD directory if (@level4){ print "I have item level metadata!\n"; if ($newbie || (! $itemMDexists)){ print OUT "

        Metadata

        \n";} foreach (@level4){ print OUT $_;} if ($newbie || ( $itemMDexists)){ print OUT "
      \n";} } else{ print "NO ITEM LEVEL METADATA!\n";} if ($transcriptsExist){ print OUT "

        Transcripts

        \n"; } # if, in the scans directory, there is a Transcriptions subdirectory, # we need to consider these as items for storage. undef @transdirs; if (-e $oldTrans){ if (($newbie) || (!$transcriptsExist)){ print OUT "

          Transcripts

          \n";} push (@transdirs, $oldTrans); foreach $tr (@transdirs){ opendir(TRANS, $tr) or die "can't open $tr\n"; while ($file = readdir(TRANS)){ if ($file =~ /^\./){ next;} # skip dot files # print "found $file\n"; $this = $tr."/".$file; if (-d $this){ if ($this =~ /\d{4}\_\d{7}/ || $this =~ /OCR/){ # a directory containing compound push (@transdirs, $this); next; } } else{ # not a directory # note: this ignores pdfs or other derivatives! if ($file =~ /^(.*)(\.tif)$/ || $file =~ /^(.*)(\.txt)$/ || $file =~ /^(.*)(\.xml)$/ ){ #TEST FORM OF FILENAME!! $id = $1; $ext = $2; undef $one; undef $two; undef $three; undef $four; undef $five; if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4})(\_[\d]{3})?/ || $id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4})?/){ $one = $1; $two = $2; $three = $3; $four = $4; $five = $5; if ($five){ $where = $one."/".$two."/".$three."/".$four."/".$five."/Transcripts";} elsif($four){ $where = $one."/".$two."/".$three."/".$four."/Transcripts";} elsif($three){ $where = $one."/".$two."/".$three."/Transcripts";} elsif($two){$where = $one."/".$two."/Transcripts";} else{$where = $one."/Transcripts";} if (($one ne $topdir || $two ne $secdir) || (($thirdir) && $three ne $thirdir)){ print "FILENAME ERROR: $id in $tr\n"; next; } $where =~ s,\_,,g; $old = $tr."/".$file; $path = $outbase."/".$where; $path =~ s,\/\/+,\/,g; # remove excess / # $path =~ s/$collbase/$collbase\/Transcripts\//; # put in the transcripts directory # print "Is this the correct transcripts directory for $file? \n$path\nY or N\n\n"; # $answ = ; # if ($answ =~ /n/i){ #TRANS: print "give me the correct path, please: \n\n"; # $answ = ; # chomp ($answ); # print "Is this the correct transcripts directory for $file? \n$path\nY or N\n\n"; # $answ = ; # if ($answ =~ /n/i){ goto TRANS;} # else{ $path = $answ;} # } if ( ! -e $path){ `mkdir -p $path`;} $new = $path."/".$file; if ($ext ne ".tif"){ # no versions on tifs and wav files ©this ($old, $new, $file, 0); $version = 1; ($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension $newv = $path."/".$v; while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist $version ++; ($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension print "version will be $v\n"; $newv = $path."/".$v; } # old file, new file, what to link, filename with extension, 1= linkit ©this ($old, $newv, $v, 1); # linking in version # not linking unversion } else{ # if a tiff, and it's not already there, link the actual thing, no versioning if (-e $new){ print "ERROR! $new already exists!\n";} else{ ©this ($old, $new, $file, 1); # linking these right now } } } else{ print "FILENAME ERROR: $id in $tr\n";} } else { # text files? xlsx? OCR? Transcripts? collection level or item level metadata? if ($file =~ /Thumbs.db/){ next;} print "OTHER FILES IN TRANSCRIPTS: $file in $tr\n"; } } # end of not a directory in transcripts }# end of this transcript directory close(TRANS); } if (($newbie) || (!$transcriptsExist)){ print OUT "
        \n";} } # end if there's transcripts foreach (@level5){ print OUT $_;} # no level5 if no existing transcripts # if we find OCR, we need to do something similar: Text/OCR # we need to find all scans directories, and there may be several, with various # addendums such as Scans_check1, Scans_Online, RandomScans print OUT "

          Content

          \n"; # assuming everything has content foreach $sd (@scandirs){ opendir (SD, $sd) or die "can't open $sd\n"; while ($file = readdir(SD)){ if ($file =~ /^\./){ next;} # skip dot files # print "found $file\n"; $this = $sd."/".$file; if (-d $this){ if ($this =~ /\d{4}\_\d{7}/){ # a directory containing compound push (@scandirs, $this); next; } elsif ( $file =~ /Transcript/i){ # put these in transcript directory on item level? print "TRANSCRIPTS found in $sd\n"; } elsif ($file =~ /OCR/i){ print "OCR found in $sd\n"; # put these in OCR directory on item level? } else{ print "UNKNOWN DIR: $this\n";} } else{ # not a directory # note: this ignores pdfs or other derivatives! if ($file =~ /^(.*)(\.tif)$/ || $file =~ /^(.*)(\.wav)$/){ #TEST FORM OF FILENAME!! $id = $1; $ext = $2; if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4}(\_[\d]{3})?)?$/){ if (($1 ne $topdir || $2 ne $secdir) || (($thirdir) && $3 ne $thirdir)){ print "FILENAME ERROR: $id in $sd\n"; next; } @parts = split ("_", $id); $old = $sd."/".$file; $where = join("/",@parts); # create directory structure needed $path = $outbase.$where; if ( ! -e $path){ `mkdir -p $path`;} $new = $path."/".$file; #NO!! NO versioning of tiffs and wav files. Too much room!! # $version = 1; # add new version and link it; do not overwrite old file # while (-e $new){ # $file = $id.".v".$version.$ext; # $new = $path."/".$file; # } copythis ($old, $new, $file, 1); } else{ print "FILENAME ERROR: $id in $sd\n";} } else { # text files? xlsx? OCR? Transcripts? collection level or item level metadata? if ($file =~ /Thumbs.db/){ next;} print "OTHER FILES IN SCANS: $file in $sd\n"; } } # end of not a directory in scans }# end of this scan directory } # end of scan directories if ($newbie){ print OUT "
        \n"; print OUT $tail; } else{ foreach (@level6){ print OUT $_;}} if (!$test){close OUT;} } # end of whether the file found matches the pattern } # end of the directory close(BASE); if (! $test){ open (OUT, ">parentMans") or die "can't write to parentMans\n";} # manifest time @makethese = keys (%makeMan); @alterthese = keys (%alterMan); # CHANGE THIS TO SUIT WHAT IS NEEDED $mhead = ' University of Alabama Hoole Special Collections Manucripts u0003 Manifest Page

        University of Alabama '; $mtail = '

      LOCKSS system has permission to collect, preserve, and serve this Archival Unit

      '; if (@makethese){ print "\n\nNEED TO CREATE MANIFESTS: \n"; foreach $m (@makethese){ if (-e $m){ print "WHY MAKE $m? IT EXISTS!\n"; next;} print " $m \n"; %kids = %{$makeMan{$m}}; @colls = sort keys (%kids); foreach $c (@colls){ ($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path $mylink = $linkbase."/".$p."/Documentation/Manifest.html"; # while (($c,$t) = each (%kids)){ print "
    • ".$kids{$c}." $c Manifest
    • \n"; } print "\n"; if (!$m =~ /\d{7}/){ push (@thesetoo, $m); } } } if (@alterthese){ print "\n\nNEED TO ALTER MANIFESTS: \n"; foreach $a (@alterthese){ if (! $a){ next;} undef @gotthese; undef @rewrite; print " $a \n"; open (AMAN, $a) or die "can't read $a\n"; while ($line = ){ if ($line =~ /<\/ul>/){ close (AMAN); next; } if ($line =~ /
    • .* ([a-z]{1}\d{4}\_\d{7})(\_\d{7})? /){ push (@gotthese, $1.$2); } push (@rewrite, $line); } close (AMAN); %kids = %{$alterMan{$a}}; @colls = sort keys (%kids); foreach $c (@colls){ ($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path $mylink = $linkbase."/".$p."/Documentation/Manifest.html"; undef $found; foreach $g (@gotthese){ if ($g eq $c){ $found = 1;} } if (! $found){ push (@rewrite, "
    • ".$kids{$c}." $c Manifest
    • \n"); print "
    • ".$kids{$c}." $c Manifest
    • \n"; } else { print "found $g in $a already!!!\n";} # print " $c\t".$kids{$c}\n"; #while (($c,$t) = each (%kids)){ #print " $c\t$t\n"; } if (! $test){ open (AMAN, ">".$a) or die "can't write to $a\n"; foreach (@rewrite){ print AMAN $_;} print AMAN $mtail; close(AMAN); } print OUT "\n\n -- --- ALTERED -----\n\n"; foreach (@rewrite){ print OUT $_;} print OUT $mtail; print "\n"; } } close (OUT); @sorted = sort (@thesetoo); if (@sorted){ print "\n\nALTER TOP LEVEL MANIFEST TO INCLUDE THESE!\n"; foreach (@sorted){ print " $_\n";} } $dbh->disconnect(); exit; # old = file to copy and where # new = what to name it and where # file = filename alone with extension # ready is if we want to link it or not -- 0 means no, 1 means yes # if level is present -- last parameter -- what level of Manifest to link it into... sub copythis{ ($old, $new, $file, $ready, $level) = @_; # print "NEW: $new\nFILE: $file\n"; ($whichdir = $new) =~ s,\/srv\/archive,,; # oops, don't want this in link $whichdir =~ s,Metadata\/\/,Metadata\/,; # print "DIR: $whichdir\n"; if (! -e $new || $ready == 0){ # allow overwrite of metadata which is not version print MOVE "$old $new\n"; if (! $test){ copy ($old, $new) or die "cannot copy $old to $new\n"; } if ($ready){ # do NOT link in metadata files yet, except METS and MODS for etds $mylink = $linkbase.$whichdir; if ($level == 4){ # item level metadata push (@level4, "\t\t
    • $file
    • \n"); } elsif ($level == 3){ push (@level3, "\t\t
    • $file
    • \n"); } else{ print OUT "\t\t
    • $file
    • \n"; } } } else{ print "WARNING! $new exists, NOT overwriting with $old\n";} # note, if there is more than one jpg, png or gif, only the first found is saved }