){
if ($m =~ /Administrative Information *<\/h3>/){
push (@level1, $m);
$level = 2;
}
elsif ($m =~ /Collection Level Metadata *<\/h3>/){
push (@level2, $m);
$level = 3;
}
elsif ($m =~ /Metadata *<\/h3>/){
push (@level3, $m); # add these headers later
$itemMDExists = 1;
$level = 4;
}
elsif ($m =~ /Content *<\/h3>/){
push (@level4, $m); # skip level 5;
$level = 6;
}
elsif ($level == 1){ push(@level1, $m);}
elsif ($level == 2){ push(@level2, $m);}
elsif ($level == 3){ push(@level3, $m);}
elsif ($level == 4){ push(@level4, $m);}
elsif ($level == 6){ push(@level6, $m);}
}
close(MAN);
# now, start to rewrite it:
if (! $test){
open (OUT, ">".$manifest) or die "can't write to $manifest\n";
}
foreach (@level1){ print OUT $_;}
}
else{
if (! $test){ open(OUT, ">".$manifest) or warn "can't open $manifest to write.\n";}
$head = '
'.$title.' Manifest Page
'.$title.' '.$collnum.' Manifest Page
';
$tail = '
LOCKSS system has permission to collect, preserve, and serve this Archival Unit
';
print OUT $head;
}
# now, move other content from the admin folder
opendir(ADMIN, $admindir) or die "can't open $admindir\n";
if ($newbie){ print OUT " Administrative Information
\n";}
while ($file = readdir(ADMIN)){
if ($file =~ /^\./){ next;} # skip dot files
# very specific xml files accepted ONLY
if ($file =~ /$collnum\.xml/ || $file =~ /$collnum\.\d{1,2}\.xml/){ # accommodates collection number
# also collnum.2.xml, collnum.3.xml, etc
$old = $admindir."/".$file;
$new = $docdir."/".$file;
$val = 1;
if ( -e $new){
$val = `diff $old $new`;
}
if ($val){ # if they differ, keep the new one.
# do NOT link in the default file name, as the content may change
©this ($old, $new, $file, 0);
$version = 1;
($v = $file) =~ s,\.xml,\.v$version\.xml,; # add .v1 before extension
print "version will be $v\n";
$newv = $docdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $file) =~ s,\.xml,\.v$version\.xml,; # add .v1 before extension
print "version will be $v\n";
$newv = $docdir."/".$v;
}
# link in the v1 -- in later versions, look to see what version we're creating
©this ($old, $newv, $v, 1);
}
}
elsif ($file =~ /\.txt/ || $file =~ /\.rtf/ ){
$old = $admindir."/".$file;
$new = $docdir."/".$file;
# do NOT put these in LOCKSS
©this ($old, $new, $file, 0);
}
elsif ($file =~ /(\.png)/ || $file =~ /(\.jpg)/ || $file =~ /(\.gif)/ || $file =~ /(\.tif)/){
$newfile = $collnum.".icon".$1; #fred.jpg becomes u0003_0000002.icon.jpg
if ($newfile ne $file){
print " should we rename $file to $newfile?\n";
}
push (@iconlist, $admindir."/".$file); #$docdir.$newfile); # print this later
$old = $admindir."/".$file;
$new = $docdir."/".$file;
©this ($old, $new, $file, 0);
$version = 1;
($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $docdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
print "version will be $v\n";
$newv = $docdir."/".$v;
}
©this ($old, $newv, $v, 1);
}
else{
if ($file ne "Thumbs.db"){
print "NOT SAVING $collnum admin $file\n";
}
}
}
close (ADMIN);
if ($newbie){
print OUT "
\n";
}
else{
foreach (@level2){ print OUT $_;} # this prints the rest of the existing Admin data, and starts the
# collection level metadata section
}
# next, metadata
if (-e $oldMDdir){
undef $metsdir;
opendir(MD, $oldMDdir) or die "can't open $oldMDdir\n";
# if ($newbie){ print OUT " Collection Level Metadata
\n";}
while ($file = readdir(MD)){
if ($file =~ /^\./){ next;} # skip dot files
# watch out for item-level metadata.
if ($file =~ /(\.txt)/ || $file =~ /(\.xml)/){
$ext = $1;
# watch out for item-level metadata.
if ($file =~ /$collnum(\_\d{7}(\_\d{4}(\_\d{3})?)?)/){
# print "I think this could be item-level metadata.\n";
$myplace = $1;
# print "myplace = $myplace\n";
($dirpath = $myplace) =~ s,\_,\/,g; # trade underscores for slashes
$thisMDdir = $collbase.$dirpath."/Metadata/";
$thisMDdir =~ s,\/\/+,\/,g; # get rid of excess /
if (! -e $thisMDdir){ `mkdir -p $thisMDdir`;}
# $newfile = $collnum.$myplace.$ext;
$newfile = $file; # not changing the name of this file
# print "!!! item level metadata: $file going to $thisMDdir as $newfile\n";
$new = $thisMDdir.$file;
$old = $oldMDdir.$file;
undef $val;
if (-e $new){
$val = `diff $old $new`;
# print "Diff value -->$val<-- for $path versus $new\n";
}
if ($val){ # check file dates.
undef $dateAver; # archive version
undef $dateDver; # deposit version
open (AVER, $new) or die "can't look in $new\n";
# this will get the last recordChange date listed, if there is one
while ($line = ){
if ($line =~ /]*> *([^<]*) *<\/recordCreationDate>/){
$thisdate = $1;
if (!$dateAver){ $dateAver = $thisdate;}
elsif ($thisdate > $dateAver){ $dateAver = $thisdate;}
}
elsif ($line =~ /]*> *([^<]*) *<\/recordChangeDate>/){
$thisdate = $1;
if (!$dateAver){ $dateAver = $thisdate;}
elsif ($thisdate > $dateAver){ $dateAver = $thisdate;}
}
}
close(AVER);
open (DVER, $old) or die "can't look in $old\n";
while ($line = ){
if ($line =~ /]*> *([^<]*) *<\/recordCreationDate>/){
$thisdate = $1;
if (!$dateDver){ $dateDver = $thisdate;}
elsif ($thisdate > $dateDver){ $dateDver = $thisdate;}
}
elsif ($line =~ /]*> *([^<]*) *<\/recordChangeDate>/){
$thisdate = $1;
if (!$dateDver){ $dateDver = $thisdate;}
elsif ($thisdate > $dateDver){ $dateDver = $thisdate;}
push (@adates, $1);
}
}
close(DVER);
print "comparing deposit date of $dateDver and archive date of $dateAver\n";
if ($dateDver > $dateAver){ print "Deposited version of $file is newer than the archive version!\n";}
else{ print "Deposited version of $file is NOT newer than the archive version\n"; undef $val;}
}
if ((! -e $new) || ($val)){
# print "$new already exists! overwriting and versioning\n";
©this ($old, $new, $file, 0); # not linking these right now
$version = 1;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $thisMDdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
# print "version will be $v\n";
$newv = $thisMDdir."/".$v;
}
if ($myplace){ # item-level metadata
# print "at this point, myplace = $myplace\n";
©this ($old, $newv, $v, 1, 4); # last says put this on level 4 of manifest
}
else{ # collection level
©this ($old, $newv, $v, 1, 3);
}
}
}
# watch out for eads!!
elsif ($file =~ /($collnum\.ead)\.xml/){
$myname = $1;
# print MOVE "$oldMDdir/$file $newMDdir/$file\n";
if (! -e $newMDdir){ `mkdir -p $newMDdir`;}
$newfile = $myname.$ext;
$old = $oldMDdir."/".$file;
$new = $newMDdir."/".$newfile;
# check eadid
open (EAD, $old) or die "can't read $old\n";
undef @myead;
undef $found;
while ($line = ){
if ($line =~ / *<\/eadid>/){
$line = " $collnum\n";
$found = 1;
}
push (@myead, $line);
}
close(EAD);
if ($found){ # need to repair the file
open (EAD, ">".$old) or die "can't write to $old\n";
foreach (@myead){ print EAD $_;}
close(EAD);
}
if (-e $new){ print "ERROR! $newfile already exists!\n";}
else{
©this ($old, $new, $file, 0); # not linking these right now
}
$version = 1;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $newMDdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
print "version will be $v\n";
$newv = $newMDdir."/".$v;
}
©this ($old, $newv, $v, 1);
}
elsif ($file =~ /$collnum(\.\d{1,2})?/){ # if .1.txt
$added = $1;
# print MOVE "$oldMDdir/$file $newMDdir/$file\n";
if (! -e $newMDdir){ `mkdir -p $newMDdir`;}
$newfile = $collnum.$added.$ext; # retains added number
$old = $oldMDdir."/".$file;
$new = $newMDdir."/".$newfile;
if (-e $new){ print "ERROR! $newfile already exists!\n";}
else{
©this ($old, $new, $newfile, 0); # linking these right now
}
$version = 1;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $newMDdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
print "version will be $v\n";
$newv = $newMDdir."/".$v;
}
©this ($old, $newv, $v, 1);
}
else{ #renaming it as collection metadata. Beware! if more than one file... extra versions
$newfile = $collnum.$ext; #descriptiveMetadata.txt becomes u0003_0000002.txt
$old = $oldMDdir."/".$file;
$new = $newMDdir."/".$newfile;
if (-e $new){ print "ERROR! $newfile already exists!\n";}
else{ ©this ($old, $new, $newfile, 0);}
if ($newfile ne $file){print "just renamed $file to $newfile, but not linking it\n";}
else{ # we want to link a version of this
$version = 1;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $newMDdir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $newfile) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
print "version will be $v\n";
$newv = $newMDdir."/".$v;
}
©this ($old, $newv, $v, 1);
}
}
}
# what if METS comes before collection-level metadata file?
# how do we manage that in the manifest writing?
# save it, do it after closing the Admin section
elsif ($file =~ /^METS/){
$metsdir = $oldMDdir.$file;
}
else{
print "NOT SAVING $collnum metadata $file\n";
}
}
close (MD);
if (@level3){
if ($newbie){
print OUT " Collection Level Metadata
\n";
}
foreach (@level3){ print OUT $_;} # prints existing coll-level metadata -- then start of next section
if ($newbie){print OUT "
\n";}
}
# if (@level4){
# if ($newbie){ print OUT " Metadata
\n";}
# foreach (@level4){ print OUT $_;}
# if ($newbie){ print OUT "
\n";}
# }
if ($metsdir){ # SECTION FOR METS FILES
opendir(METS, $metsdir) or die "can't open $metsdir\n";
if ((! $itemMDexists) || ($newbie)){ print OUT " METS Metadata
\n";}
while ($m = readdir (METS)){
if ($m =~/^(.*)\.mets\.xml/){
$id = $1;
$version = 1;
$v = $id.".mets.v$version.xml";
if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4}(\_[\d]{3})?)?$/){
if (($1 ne $topdir || $2 ne $secdir) || (($thirdir) && $3 ne $thirdir)){
print "FILENAME ERROR: $id in $metsdir\n";
next;
}
@parts = split ("_", $id);
$old = $metsdir."/".$m;
$where = join("/",@parts); # create directory structure needed
$mydir = $outbase.$where."/Metadata";
# print MOVE "$metsdir/$m $mydir/$m\n";
if (! -e $mydir){ `mkdir -p $mydir`;} # make my directory if it does not exist
#$mylink = "http://libcontent1.lib.ua.edu/lockss/".join("/",@parts)."/Metadata/".$m;
#HEREERERERER
$newv = $mydir."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
$v = $id.".mets.v$version.xml";
print "version will be $v\n";
$newv = $mydir."/".$v;
}
©this ($old, $newv, $v, 1, 4);
$newme = $mydir."/".$m;
©this ($old, $newme, $m, 0);
$newme = $mydir.$m;
}
}
}
close (METS);
} # end of if METS dir
} # end of old MD directory
if (@level4){
print "I have item level metadata!\n";
if ($newbie){ print OUT " Metadata
\n";}
foreach (@level4){ print OUT $_;}
if ($newbie){ print OUT "
\n";}
}
else{ print "NO ITEM LEVEL METADATA!\n";}
if ($transcriptsExist){
print OUT " Transcripts
\n";
}
# if, in the scans directory, there is a Transcriptions subdirectory,
# we need to consider these as items for storage.
undef @transdirs;
if (-e $oldTrans){
if (($newbie) || (!$transcriptsExist)){ print OUT " Transcripts
\n";}
push (@transdirs, $oldTrans);
foreach $tr (@transdirs){
opendir(TRANS, $tr) or die "can't open $tr\n";
while ($file = readdir(TRANS)){
if ($file =~ /^\./){ next;} # skip dot files
# print "found $file\n";
$this = $tr."/".$file;
if (-d $this){
if ($this =~ /\d{4}\_\d{7}/){ # a directory containing compound
push (@transdirs, $this);
next;
}
}
else{ # not a directory # note: this ignores pdfs or other derivatives!
if ($file =~ /^(.*)(\.tif)$/ || $file =~ /^(.*)(\.txt)$/ || $file =~ /^(.*)(\.xml)$/ ){
#TEST FORM OF FILENAME!!
$id = $1;
$ext = $2;
if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4}(\_[\d]{3})?)?$/){
$one = $1;
$two = $2;
$three = $3;
$four = $4;
$five = $5;
$where = $one."/".$two."/Transcripts/".$three."/".$four."/".$five;
if (($one ne $topdir || $two ne $secdir) || (($thirdir) && $three ne $thirdir)){
print "FILENAME ERROR: $id in $tr\n";
next;
}
$where =~ s,\_,,g;
#HERE
# @parts = split ("_", $id);
$old = $tr."/".$file;
# $where = join("/",@parts); # create directory structure needed
$path = $outbase."/".$where;
$path =~ s,\/\/+,\/,g; # remove excess /
# $path =~ s/$collbase/$collbase\/Transcripts\//; # put in the transcripts directory
# print "Is this the correct transcripts directory for $file? \n$path\nY or N\n\n";
# $answ = ;
# if ($answ =~ /n/i){
#TRANS: print "give me the correct path, please: \n\n";
# $answ = ;
# chomp ($answ);
# print "Is this the correct transcripts directory for $file? \n$path\nY or N\n\n";
# $answ = ;
# if ($answ =~ /n/i){ goto TRANS;}
# else{ $path = $answ;}
# }
if ( ! -e $path){ `mkdir -p $path`;}
$new = $path."/".$file;
if ($ext ne ".tif"){ # no versions on tifs and wav files
©this ($old, $new, $file, 0);
$version = 1;
($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
$newv = $path."/".$v;
while ( -e $newv){ # keep incrementing version # till you find one that doesn't exist
$version ++;
($v = $file) =~ s,(\.[a-z]{3})$,\.v$version\1,; # add .v1 before extension
print "version will be $v\n";
$newv = $path."/".$v;
}
# old file, new file, what to link, filename with extension, 1= linkit
©this ($old, $newv, $v, 1);
# linking in version
# not linking unversion
}
else{ # if a tiff, and it's not already there, link the actual thing, no versioning
if (-e $new){ print "ERROR! $new already exists!\n";}
else{
©this ($old, $new, $file, 1); # linking these right now
}
}
}
else{ print "FILENAME ERROR: $id in $tr\n";}
}
else { # text files? xlsx? OCR? Transcripts? collection level or item level metadata?
if ($file =~ /Thumbs.db/){ next;}
print "OTHER FILES IN TRANSCRIPTS: $file in $tr\n";
}
} # end of not a directory in transcripts
}# end of this transcript directory
close(TRANS);
}
if (($newbie) || (!$transcriptsExist)){ print OUT "
\n";}
} # end if there's transcripts
foreach (@level5){ print OUT $_;} # no level5 if no existing transcripts
# if we find OCR, we need to do something similar: Text/OCR
# we need to find all scans directories, and there may be several, with various
# addendums such as Scans_check1, Scans_Online, RandomScans
print OUT " Content
\n"; # assuming everything has content
foreach $sd (@scandirs){
opendir (SD, $sd) or die "can't open $sd\n";
while ($file = readdir(SD)){
if ($file =~ /^\./){ next;} # skip dot files
# print "found $file\n";
$this = $sd."/".$file;
if (-d $this){
if ($this =~ /\d{4}\_\d{7}/){ # a directory containing compound
push (@scandirs, $this);
next;
}
elsif ( $file =~ /Transcript/i){
# put these in transcript directory on item level?
print "TRANSCRIPTS found in $sd\n";
}
elsif ($file =~ /OCR/i){
print "OCR found in $sd\n";
# put these in OCR directory on item level?
}
else{ print "UNKNOWN DIR: $this\n";}
}
else{ # not a directory
if ($file =~ /^(.*)(\.[\w]{2,5})$/ || $file =~ /^(.*)(\.[\w]{2,5})$/){
#TEST FORM OF FILENAME!!
$id = $1;
$ext = $2;
if ($id =~ /^([a-z]{1}[\d]{4})\_([\d]{7})\_([\d]{7})(\_[\d]{4}(\_[\d]{3})?)?$/){
if (($1 ne $topdir || $2 ne $secdir) || (($thirdir) && $3 ne $thirdir)){
print "FILENAME ERROR: $id in $sd\n";
next;
}
@parts = split ("_", $id);
$old = $sd."/".$file;
$where = join("/",@parts); # create directory structure needed
$path = $outbase.$where;
if ( ! -e $path){ `mkdir -p $path`;}
$new = $path."/".$file;
copythis ($old, $new, $file, 1);
}
else{ print "FILENAME ERROR: $id in $sd\n";}
}
else { # text files? xlsx? OCR? Transcripts? collection level or item level metadata?
if ($file =~ /Thumbs.db/){ next;}
print "OTHER FILES IN SCANS: $file in $sd\n";
}
} # end of not a directory in scans
}# end of this scan directory
} # end of scan directories
if ($newbie){
print OUT "
\n";
print OUT $tail;
}
else{ foreach (@level6){ print OUT $_;}}
if (!$test){close OUT;}
} # end of whether the file found matches the pattern
} # end of the directory
close(BASE);
if (! $test){ open (OUT, ">parentMans") or die "can't write to parentMans\n";}
# manifest time
@makethese = keys (%makeMan);
@alterthese = keys (%alterMan);
# CHANGE THIS TO SUIT WHAT IS NEEDED
$mhead = '
University of Alabama Libraries (Born Digital) University Research u0015 Manifest Page
University of Alabama ';
$mtail = '
LOCKSS system has permission to collect, preserve, and serve this Archival Unit
';
if (@makethese){
print "\n\nNEED TO CREATE MANIFESTS: \n";
foreach $m (@makethese){
if (-e $m){ print "WHY MAKE $m? IT EXISTS!\n"; next;}
print " $m \n";
%kids = %{$makeMan{$m}};
@colls = sort keys (%kids);
foreach $c (@colls){
($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path
$mylink = $linkbase."/".$p."/Documentation/Manifest.html";
# while (($c,$t) = each (%kids)){
print " - ".$kids{$c}." $c Manifest
\n";
}
print "\n";
if (!$m =~ /\d{7}/){
push (@thesetoo, $m);
}
}
}
if (@alterthese){
print "\n\nNEED TO ALTER MANIFESTS: \n";
foreach $a (@alterthese){
if (! $a){ next;}
undef @gotthese;
undef @rewrite;
print " $a \n";
open (AMAN, $a) or die "can't read $a\n";
while ($line = ){
if ($line =~ /<\/ul>/){
close (AMAN);
next;
}
if ($line =~ /- .* ([a-z]{1}\d{4}\_\d{7})(\_\d{7})? /){
push (@gotthese, $1.$2);
}
push (@rewrite, $line);
}
close (AMAN);
%kids = %{$alterMan{$a}};
@colls = sort keys (%kids);
foreach $c (@colls){
($p = $c) =~ s,\_,\/,g; # underscores become forward slashes for path
$mylink = $linkbase."/".$p."/Documentation/Manifest.html";
undef $found;
foreach $g (@gotthese){
if ($g eq $c){ $found = 1;}
}
if (! $found){
push (@rewrite, "
- ".$kids{$c}." $c Manifest
\n");
print " - ".$kids{$c}." $c Manifest
\n";
}
else { print "found $g in $a already!!!\n";}
# print " $c\t".$kids{$c}\n";
#while (($c,$t) = each (%kids)){
#print " $c\t$t\n";
}
open (AMAN, ">".$a) or die "can't write to $a\n";
foreach (@rewrite){ print AMAN $_;}
print AMAN $mtail;
close(AMAN);
print OUT "\n\n -- --- ALTERED -----\n\n";
foreach (@rewrite){ print OUT $_;}
print OUT $mtail;
print "\n";
}
}
close (OUT);
$dbh->disconnect();
@sorted = sort (@thesetoo);
if (@sorted){
print "\n\nALTER TOP LEVEL MANIFEST TO INCLUDE THESE!\n";
foreach (@sorted){ print " $_\n";}
}
exit;
# old = file to copy and where
# new = what to name it and where
# file = filename alone with extension
# ready is if we want to link it or not -- 0 means no, 1 means yes
# if level is present -- last parameter -- what level of Manifest to link it into...
# see ETD script for this
sub copythis{
($old, $new, $file, $ready, $level) = @_;
print "NEW: $new\nFILE: $file\n";
($whichdir = $new) =~ s,\/srv\/archive,,; # oops, don't want this in link
$whichdir =~ s,Metadata\/\/,Metadata\/,;
print "DIR: $whichdir\n";
# print "level is $level for $file\n";
# if (! -e $new){
print MOVE "$old $new\n";
if (! $test){
copy ($old, $new) or die "cannot copy $old to $new\n";
}
if ($ready){
$mylink = $linkbase.$whichdir;
print "MYLINK: $mylink\n\n";
if ($level == 4){ # item level metadata
push (@level4, "\t\t- $file
\n");
}
elsif ($level == 3){
push (@level3, "\t\t- $file
\n");
}
else{
print OUT "\t\t- $file
\n";
}
}
# }
# else{ print "WARNING! $new exists, NOT overwriting with $old\n";}
# note, if there is more than one jpg, png or gif, only the first found is saved
# else { print "NOT SAVING $collnum admin folder $file\n";}
}