#!/usr/bin/perl use File::Copy; use Time::Local; use DBI; # moveAudioContent # jody DeRidder, 5/27/10 ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 6/10/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # this script will copy content from the share drive to the deposits directory # it will compare that content to what is in the directory on the share drive # if there's no difference, it will delete what's on the share drive # this incorporates cleanCollInfo to check the admin file, if there is one # and collToDbase to get it into the database (InfoTrack.allColls) as well. # expects share drive content under /cifs-mount/ # requests collection directory on share drive # locates all Scans directories and Transcript directories ×tamp; # location hardcoded here: $inbase = "/cifs-mount/Digital_Coll_Complete/"; $outbase = "/srv/deposits/content/"; $archive = "/srv/archive/"; $acumen = "/srv/www/htdocs/content/"; $icons = "/srv/scripts/collstuff/icons"; $icondir = "/srv/www/htdocs/digital/images/"; $iconURLbase = "http://libcontent1.lib.ua.edu/digital/images/"; open (ICONS, $icons) or die "can't open $icons\n"; while ($line = ){ chomp $line; if ($line =~ /^(.+)\.icon\.png/){ $collnum = $1; $mypic{$collnum} = $line; } } close (ICONS); %typelist = ("image" => "image.icon.png", "text" => "text.icon.png", "mixedmedia" => "mixedmedia.icon.png", "audio" => "audio.icon.png", "video" => "video.icon.png", "book" => "book.icon.png", "score" => "score.icon.png", "findingaid" => "mss.icon.png", "other" => "other.icon.png" ); $date = `date`; $yr = substr($date,24,4); $mon = substr($date,4,3); $day = substr($date,8,2); $day =~ s, ,0,; #if ($day <10){$day = "0".$day;} if ($mon eq "Jan"){$month = "01";} elsif ($mon eq "Feb"){$month = "02";} elsif ($mon eq "Mar"){$month = "03";} elsif ($mon eq "Apr"){$month = "04";} elsif ($mon eq "May"){$month = "05";} elsif ($mon eq "Jun"){$month = "06";} elsif ($mon eq "Jul"){$month = "07";} elsif ($mon eq "Aug"){$month = "08";} elsif ($mon eq "Sep"){$month = "09";} elsif ($mon eq "Oct"){$month = "10";} elsif ($mon eq "Nov"){$month = "11";} else{$month = "12";} $today = "$yr$month$day"; $hostname = "localhost"; $port = "3306"; $user = "user"; $password = "password"; $database = "InfoTrack"; $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; # use this to make collection link $acumenStart = "http://acumen.lib.ua.edu/c/"; # this to discriminate between digital and analog in the database $D = $dbh->quote("D"); $A = $dbh->quote("A"); # what are we working? opendir(BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ # print "looking at $file\n"; if ($file =~ /^\./){next;} # skip dot files $path = $inbase.$file; if (-d $path){ push (@dirs, $file); } } close(BASE); print "\n\nI'm looking through $inbase. \n Here are the directories there. \n\n"; $dircount = scalar @dirs; for ($i = 1; $i <= $dircount; $i ++){ print $i.") ".$dirs[$i-1]."\n"; } print "\nWhich directory do you want? \n Type the number and press enter:\n\n"; $num = ; chop $num; $num --; $inbase = $inbase.$dirs[$num]."/"; undef $here; while (!$here){ print "\nI'm looking through $inbase; \nis this the collection directory?\n Y or N: \n\n"; $ans = ; chomp ($ans); if ($ans =~ /Y/i){ $here = 1; last; } else{ print "\nShould I go back up a directory level?\n Y or N: \n\n"; $ans = ; chomp ($ans); if ($ans =~ /Y/i){ $inbase =~ s,^(.*\/).*?\/$,\1,; next; } else{ print "\nPlease choose one of the following directories: \n\n"; undef @dirs; opendir(BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ if ($file =~ /^\./){next;} # skip dot files $path = $inbase.$file; if (-d $path){ push (@dirs, $file); } } close(BASE); $dircount = scalar @dirs; for ($i = 1; $i <= $dircount; $i ++){ print $i.") ".$dirs[$i-1]."\n"; } print "\n\n Type the number and press enter:\n"; $num = ; chop $num; $num --; $inbase = $inbase.$dirs[$num]."/"; } } } # now we are in the correct collection directory # check for the collection number first undef $gotit; if ($inbase =~ /([a-z]{1}[\d]{4}\_[\d]{7})/){ $collnum = $1; } else{ print "\nPlease enter the collection number\n"; $collnum = ; chomp ($collnum); } while (!$gotit){ print "\nIs $collnum the correct collection number? Y or N:\n"; $ans = ; chomp ($ans); if ($ans =~ /N/i){ print "\nPlease enter the collection number\n"; $collnum = ; chomp ($collnum); } else{ $gotit = 1; last; } } # let's check for an existing admin xml file, and also for an EAD for this coll num ($mydirs = $collnum) =~ s,\_,\/,g; # replace underscores with slashes to find the directory $oldadmin = $archive.$mydirs."/Documentation/".$collnum.".xml"; $onlineMD = $acumen.$mydirs."/Metadata"; $onlineadmin = $onlineMD."/".$collnum.".xml"; $outbase .= $collnum."/"; # collection directory in /srv/deposits, named for collection number if (! -e $outbase){ `mkdir -m 0755 $outbase`; } if (! -e $onlineMD){ `mkdir -m 0775 -p $onlineMD`; } # set up directory for collection file if (-e $oldadmin){ open (OLD, $oldadmin) or die "can't look at $oldadmin\n"; undef $/; # let's slurp in the whole file $oldAdminFile = ; close(OLD); $/ = "\n"; # reset to recognize end of line } # it's possible this collection went online since we put content in storage. # check for admin file next in online directory elsif (-e $onlineadmin){ open (OLD, $onlineadmin) or die "can't look at $onlineadmin\n"; undef $/; # let's slurp in the whole file $oldAdminFile = ; close(OLD); $/ = "\n"; # reset to recognize end of line } # next let's see what we have online already for this collection. $id = $dbh->quote($collnum); # first look for digital collection entry #print "select title, online, inAcumen, listLevel, parentID, mssUrl from allColls where id_2009 like $id and AnalogOrDigital like $D\n"; $sth = $dbh->prepare("select title, alphaBy, online, inAcumen, listLevel, blurb, type, parentID, sourceCollName, mssNum, mssUrl from allColls where id_2009 like $id and AnalogOrDigital like $D") or die "can't prepare select for $collnum to see if it's up: ", $dbh->errstr(),"\n"; $sth->execute() or die "can't select to see if $collnum is up: ", $dbh->errstr(),"\n"; ($atitle, $alpha, $online, $inAcumen, $ll, $oldblurb, $atype, $pid, $asource, $mssnum, $murl) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); $sth->finish(); # now look for analog (EAD) collection entry print "select title, online, listLevel, blurb from allColls where id_2009 like $id and AnalogOrDigital like $A\n"; $sth = $dbh->prepare("select title, online, listLevel, blurb from allColls where id_2009 like $id and AnalogOrDigital like $A") or die "can't prepare select for analog $collnum to see if it's up: ", $dbh->errstr(),"\n"; $sth->execute() or die "can't select to see if analog $collnum is up: ", $dbh->errstr(),"\n"; ($ptitle, $ponline, $pll, $pblurb) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); $sth->finish(); # this takes the analog listing offline except for as a link from the digital collection listing, # to avoid duplicate entries in the online collection list... if ($ptitle){ $parentID = $collnum;} #now let's look for the ead $ead = $acumen.$mydirs."Metadata/".$collnum.".ead.xml"; if (-e $ead){ #figure out the MS number. $eadUrl = "http://acumen.lib.ua.edu/".$collnum; if ($collnum =~ /^[a-z]{1}\d{4}\_(\d{7})/){ $msnum = "MS ".$1 + 0; # remove leading zeros } else{ print "ERROR: Unable to determine MS number from $collnum, but there IS an EAD online.\n"; print "please enter in the MS number value for $eadUrl:\n"; $msnum = ; chomp $msnum; if ($msnum =~ /(\d+)/){ $msnum = "MS ".$1;} } open (EAD, $ead) or die "can't read $ead\n"; while ($line = ){ chomp $line; if ($line =~ /<\/archdesc>/){last;} # only parse header $bigline .= $line; } close (EAD); if ($bigline =~ /]*>(.*?)<\/abstract>/i){ $abstract = &cleanup($1); } } # output file will be based on collection number and timestamp $output = "/home/jeremiah/UploadArea/output/Storing_".$collnum."_".$timestamp.".txt"; $out = "UploadArea/output/Storing_".$collnum."_".$timestamp.".txt"; open (OUT, ">".$output) or die "can't open $output\n"; $admindir = $inbase."/Admin/"; $collfile = $admindir.$collnum.".xml"; $anicon = $admindir.$collnum.".icon.png"; # let's check the icons # if not submitted, and not in our list, this will be assigned by type later if ($mypic{$collnum}){ $iconURL = $iconURLbase.$collnum.".icon.png"; if (-e $anicon){ print "\n\nThis collection already has an icon: ".$iconURLbase.$collnum.".icon.png\n\n"; print "Is the one in this collection's admin directory a better replacement? Y or N:\n\n"; $ans = ; if ($ans =~ /Y/i){ $new = $icondir.$collnum.".icon.png"; copy ($anicon, $new); } else{ print "Please remove it, and hit enter when that is done, so we can continue:\n\n"; $ans = ; } } } elsif ( -e $anicon){ # a new icon is being submitted $mypic{$collnum} = $collnum.".icon.png"; # add it to my icons list open (ICONS, ">>".$icons) or die "can't add new image to $icons\n"; print ICONS $collnum.".icon.png\n"; close(ICONS); # and copy it live $new = $icondir.$collnum.".icon.png"; copy ($anicon, $new); if (-e $new){ unlink $anicon;} # deletes local copy $iconURL = $iconURLbase.$collnum.".icon.png"; } # next, the collection xml file in the admin folder if (! -e $collfile && ! -e $oldAdminFile && ! $atitle){ print "\n\nWe need a collection xml file in the admin directory before we can continue.\n\n"; print "Please create one, and then come back and run this script again\n\n"; sleep(5); exit; } elsif($atitle){ print "\n\nWe have information in the database for this collection.\n"; print "\nIf this needs changing then please ensure that your collection xml file\nwill suffice:\n\n"; print "TITLE: $atitle\nAlphaBy: $alpha\nTYPE: $atype\nSOURCE: $asource\nManuscript Number: $mssnum\n"; print "FINDING AID LINK: $murl\nDESCRIPTION: $oldblurb\n\n"; if ($pblurb){ print "\tAlso, we have the following description for the parent collection.\n\tIf the following description is an improvement\n"; print "\tover what is in your admin XML file, please use this instead in your file: \n\n$pblurb\n\n"; } print "\tIf you have a collection xml file, and it does NOT improve on this, please delete it.\n"; print "\tPress enter when you're ready to continue: \n\n"; $ans = ; } if (-e $collfile){ if ($oldAdminFile){ print "We already have the following admin file for this collection:\n\n$oldAdminFile\n\n"; } print "\n\nIs the admin xml file in your directory new or altered? Y or N\n"; $ans = ; chomp ($ans); if ($ans =~ /Y/i){ $admin = 1; } elsif ($ans =~ /N/i){ print "Then please remove it from the directory now.\n We don't need duplicates in the archive.\n\n"; print "Press enter when you have done this, and we'll continue.\n\n"; $ans = ; } } # $admin equals 1 when the admin file being submitted is new or changed. if ($admin == 1){ # this comes from the cleanCollInfo script in the /srv/scripts/qc directory # cleaning up the file a bit, removing Windows encodings and empty tags. # also checking the type field and alphaBy, Manuscript number and finding aid link open(INFO, $collfile) or die "can't open $collfile\n"; undef @thisfile; undef $xml; undef $parentstart; undef $parentend; undef $title; undef $mssUrl; undef $alphaBy; undef $type; undef $source; undef $blurb; undef $mssNum; undef $me; undef $bigLine; while ($line = ){ chomp $line; &cleanup($line); $bigLine .= $line." "; # there may be newlines in the middle of fields. Get it all together. } # got all the info close(INFO); $bigLine =~ s, +, ,g; # get rid of multiple spaces, added to keep lines from running together. push (@thisfile, ''."\n"); push (@thisfile, "\n"); if ($bigLine =~ / *(.+) *<\/Digital\_Collection\_Name>/){ $title = $1; # print "found TITLE: $title in $collfile\n"; # $title =~ s,^ ?"(.*?)" ?$,$1,; # excessive quotes; same thing below push (@thisfile, " ".$title."\n"); } else{ $ans = "n"; while ($ans =~ /n/i){ print "ERROR: no title found in admin xml file. Please enter the collection title:\n\n"; $title = ; chomp $title; print "Is this the correct title? \"$title\" --> Y or N\n\n"; $ans = ; } push (@thisfile, " ".$title."\n"); } if ($bigLine =~ / *(.+) *<\/Alphabetized\_By>/){ $alphaBy = $1; $alphaBy =~ s,^ ?"(.*?)" ?$,$1,; push (@thisfile, " ".$alphaBy."\n"); } else{ $ans = "n"; while ($ans =~ /n/i){ print "ERROR: no alphaBy value found in admin xml file. \nPlease enter what the title should be alphabetized by:\n\n"; $alphaBy = ; chomp $alphaBy; print "Is this correct? \"$alphaBy\" --> Y or N\n\n"; $ans = ; } push (@thisfile, " ".$alphaBy."\n"); } $types = ( "1" => "book", "2" => "image", "3" => "text", "4" => "audio", "5" => "video", "6" => "mixed media", "7" => "finding aid", "8" => "score", "9" => "other" ); if ($bigLine =~ / *(.+) *<\/Type\_Of\_Content>/){ $type = lc($1); if ($type eq "book" || $type eq "image" || $type eq "text" || $type eq "audio" || $type eq "video" || $type eq "mixed media" || $type eq "finding aid" || $type eq "score" || $type eq "other"){ push (@thisfile, " ".$type."\n"); } else{ $ans = "n"; while ($ans =~ /n/i){ print "ERROR: type of content must be one of the following instead of $type.\n Please select the correct number:\n\n"; GETTYPE: while (($num, $val) = each (%types)){ print "$num. $val\n"; } print "\n"; $num = ; chomp ($num); if ($num =~ /(\d)/){ $num = $1;} # pull out the first number found, and only that. $type = $types{$num}; print "Is this correct? \"$type\" -->Y or N\n\n"; $ans = ; } push (@thisfile, " ".$type."\n"); } } else{ print "ERROR: we need a type of content.\n Please select the correct number:\n\n"; goto GETTYPE; } if ($bigLine =~ / *(.+) *<\/Analog\_Collection\_Name>/){ $source = $1; push (@thisfile, " ".$source."\n"); } if ($bigLine =~ / *(.+) *<\/Manuscript\_Number>/){ $mssNum = $1; if ($msnum && ($mssNum ne $msnum)){ print "ERROR: the MS number in the file does not match the MS number in $eadUrl\n"; print "Which is correct? \n1. $mssNum\n2. $msnum\n\nInput 1 or 2 please:\n\n"; AGAIN: $ans = ; if ($ans =~ /2/){ $mssNum = $msnum; } elsif (!($ans =~/1/)){ print "Please enter either 1 or 2:\n\n"; goto AGAIN; } } push (@thisfile, " ".$mssNum."\n"); } elsif ($msnum){ push (@thisfile, " ".$msnum."\n"); $mssNum = $msnum; # for database input } if ($bigLine =~ / *(.+) *<\/Finding\_Aid\_Link>/){ $mssUrl = $1; if ($eadUrl && ($eadUrl ne $mssUrl)){ push (@thisfile, " ".$eadUrl."\n"); $mssUrl = $eadUrl; # for database } else {push (@thisfile, " ".$mssUrl."\n");} } elsif ($eadUrl){ push (@thisfile, " ".$eadUrl."\n"); $mssUrl = $eadUrl; # for database } if ($bigLine =~ / *(.+) *<\/Digital\_Collection\_Description>/){ $blurb = $1; # $blurb=~ s,^ ?"(.*?)" ?$,$1,; push (@thisfile, " ".$blurb."\n"); } elsif ($abstract){ # pulled from EAD abstract push (@thisfile, " ".$abstract."\n"); $blurb = $abstract; } else{ $ans = "N"; while ($ans =~ /n/i){ print "\n\nPlease enter a short description for this collection, then hit enter:\n\n"; $blurb = ; chomp $blurb; print "\n\nIs this correct? Y or N:\n\n$blurb\n\n"; $ans = ; } push (@thisfile, " ".$blurb."\n"); } push (@thisfile,"\n"); if (!$title){ die "ERROR, no title from $collfile\n$bigLine\n\n";} unlink $collfile; # if I overwrite with a shorter file, the last of the previous one remains; delete it instead. open (INFO, ">".$collfile) or die "can't write to $collfile\n"; foreach (@thisfile){ print INFO $_;} close (INFO); $outfile = $onlineadmin; print "copying $collfile live into Acumen as $outfile\n"; copy($collfile, $outfile); # next section comes from /srv/scripts/collToDbase # if already online and this is a correction, correct the database # if already online and no correction needed, do nothing # if not already online, add info to online database to make this live in the collection list if (!$iconURL){ $icon = $typelist{$type}; if (! $icon){ die "ERROR: no icon for type $type\n";} $iconURL = $iconURLbase.$icon; } $icon = $dbh->quote($iconURL); $aUrl = $acumenStart.$collnum; $url = $dbh->quote($aUrl); if ($mssUrl < 1){ if ($ptitle && $ponline){ $mssUrl = "http://acumen.lib.ua.edu/$collnum";} else { $mssUrl = NULL;} } $live = $dbh->quote(1); $inA = $dbh->quote(1); $id = $dbh->quote($collnum); $t = $dbh->quote($title); $a = $dbh->quote($alphaBy); $tp = $dbh->quote($type); $s = $dbh->quote($source); $mnum = $dbh->quote($mssNum); $murl = $dbh->quote($mssUrl); $desc = $dbh->quote($blurb); $now = $dbh->quote($today); $listLevel = $dbh->quote("1"); if (! $parentID){ # if this is a u0003 file, the parentID is the same as its own if ($id =~ /u0003\_/){ $mom = $id;} else{ $mom = NULL;} } else{ $mom = $dbh->quote($parentID); } if ($atitle){ # this is already online, we're updating the entry $sth = $dbh->prepare("update allColls set online = $live, inAcumen=$inA, cannedLink=$url, title=$t, alphaBy = $a, type=$tp, sourceCollName=$s, listLevel = $listLevel, parentID = $mom, blurb=$desc, mssNum=$mnum, mssUrl = $murl, iconLocation=$icon where id_2009 like $id and AnalogOrDigital like $D") or die "can't update $collnum digital collection in database : ", $dbh->errstr(),"\n"; $sth->execute() or die "can't update $collnum digital collection in database: ", $dbh->errstr(),"\n"; $sth->finish(); } else{ # this is a new entry $sth = $dbh->prepare("insert into allColls (dnum, id_2009, title,listLevel, parentID,sourceCollName, mssNum, mssUrl, blurb, type, alphaBy, AnalogOrDigital, online, inAcumen, iconLocation, cannedLink, dateLive) values (NULL,$id,$t,$listLevel,$mom,$s,$mnum,$murl,$desc,$tp,$a,$D,$live,$inA,$icon,$url, $now)") or die "can't prepare insert for $id: ",$dbh->errstr(),"\n"; $sth->execute() or die "Can't insert into lookup $id : ", $sth->errstr(),"\n"; $sth->finish(); } if ($ptitle && $ponline){ if ($pll == 1){ # digital ranks over analog for collection list, so make this null $sth = $dbh->prepare("update allColls set listLevel = NULL where id_2009 like $id and AnalogOrDigital like $A") or die "can't change listLevel to NULL for analog $id: ", $dbh->errstr(),"\n"; $sth->execute() or die "can't execute change of listLevel for $id analog: ", $dbh->errstr(),"\n"; $sth->finish(); } } # let's copy this to storage $outadmin = $outbase."/Admin/"; $outadminfile = $outadmin.$collnum.".xml"; if (! -e $outadmin){ `mkdir -m 0755 $outadmin`;} copy ($collfile, $outadminfile); if (-e $outadminfile){ unlink $collfile;} #deletes local copy } # end if there is an admin file, and it's an improvement over what we have, or a newbie $dbh->disconnect(); # anything else in the admin directory? if (-e $admindir){ open (ADMIN, $admindir) or die "can't look in $admindir for other things\n"; while ($file = readdir(ADMIN)){ if ($file =~ /^\./ || $file =~ /thumbs\.db/i){ next;} $old = $admindir.$file; $new = $outadmin.$file; copy ($old, $new); $val = `diff $old $new`; if ((-e $new) && (! $val)){ unline $old;} # deletes share copy if it made it over in good shape } close(ADMIN); } # let's find the Scans next. opendir(BASE, $inbase) or die "can't open $inbase\n"; while ($file = readdir(BASE)){ $path = $inbase.$file."/"; if ($file =~ /^\./){ next;} elsif ($file =~ /Transcripts/i || $file =~ /Scans/i){ push (@scansdirs, $path); } } close(BASE); $mddir = $inbase."Metadata/"; $outmd = $outbase."Metadata/"; if (-e $mddir){ open (MD, $mddir) or die "can't open METADATA DIRECTORY!\n\n"; undef $found; while ($file = readdir(MD)){ if ($file =~ /\.xls/ || $file =~ /^\./){next;} if ($file =~ /$collnum.*?\.txt/){$found = 1;} $old = $mddir.$file; $new = $outmd.$file; copy ($old, $new); $val = `diff $old $new`; if ((-e $new) && (! $val)){ unlink $old;} # deletes share copy if it made it over in good shape } close(MD); } print "We have picked up the Metadata and Admin directory content\n(but not the MODS) for the archive;\n\n"; print "Now we're going to start picking up content \nfrom the scans and transcripts (if there are any).\n\n"; print "When completed, there will be a statement to that effect in the \n$out file.\n\n"; print "Please check back in a few hours!\n\n"; unless (fork){ # the child process does this stuff # we have all scans & transcript directories in @scansdirs foreach $dir (@scansdirs){ opendir(DIR, $dir) or die "can't open $dir\n"; while ($file = readdir(DIR)){ if ($file =~ /^\./ || $file =~ /thumbs\.db/i){ next;} $path = $dir.$file; ($newdir = $dir) =~ s,$inbase,$outbase,; # print "move $path to $new\n"; if (! -e $newdir){ `mkdir -m 0755 $newdir`;} if (-d $path){ push (@scansdirs, $path."/"); next; } $new = $newdir.$file; # print "move $path to $new\n"; copy ($path, $new); $val = `diff $path $new`; if ((-e $new) && (! $val)){ unlink $path;} # deletes share copy if it made it over in good shape } close(DIR); } push (@scansdirs, @scansdirs); #go through them twice foreach $dir (@scansdirs){ # go through the directory again. If nothing there, unlink it. undef $found; opendir(DIR, $dir) or next; #may have already done this one while ($file = readdir(DIR)){ if ($file =~ /^\./ || $file =~ /thumbs\.db/i){ next;} else {$found = 1;} } close(DIR); if (! $found){ `rm -r $dir`; } } print OUT "Transfer of $collnum to the storage deposits directory is complete.\n"; print OUT "Please check directories on the share drive for anything that did NOT get moved.\n"; print OUT "\n\nGREAT WORK!!! :-)\n\n"; close(OUT); exit; } exit; sub timestamp{ #following for Windows #print "hit enter twice please\n"; #$date = `date`; #$time = `time`; #print $date."\n"; #if ($date =~ /.*? (\d*)\/(\d*)\/(\d*)/){ # $date = $3.$1.$2; # } #print $date."\n"; #if ($time =~ /.*? (\d*)\:(\d*)\:(\d*)\./){ # $time = $1.$2.$3; # } #print $time."\n"; #$timestamp = $date."T".$time; #print $timestamp."\n"; # following for unix ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime(); ##$year += 1900; $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $timestamp= "$year-$mon-$mday\T$hour:$min:$sec\Z"; } sub by_number {$a <=> $b;} sub cleanup{ $line = @_[0]; if ($line =~ /\377\376/){ die "$collfile is in UTF-16! Cannot process\nPlease repair and start over.\n\n"; #,,; # byte order mark } if ($line =~ /(<\?xml version.*)$/){ $xml = 1; $line = $1;} # remove byte order mark elsif ($line =~ //){ $parentstart = 1;} elsif ($line =~ /<\/collInfo>/){ $parentend = 1;} $line =~ s,\r,,g; # no Windows newlines $line =~ s,\. \"(\s),\.\"$1,g; # no space between period and quote # try to repair MS word encodings of hyphens, quotes, apostrophes $line =~ s,\342\200\231,',g; # if you hexdump the file, in place of an apostrophe # you will see in the word line: 342 200 231 # hexdump -cox filename > output # gives octal, hex, and characters # or you can just hexdump -c and look for those goofy things $line =~ s,\342\200\230,',g; $line =~ s,\342\200\235,",g; $line =~ s,\342\200\234,",g; $line =~ s,\342\200\233,\-\-,g; $line =~ s,\342\200\224,\-\-,g; $line =~ s,\342\200\223,\-\-,g; $line =~ s,\342\200\246,\-,g; $line =~ s,\357\277\275,\',g; $line =~ s,\222,\',g; # shows up as <92> $line =~ s,\226,\-,g; # shows up as <96> # $line =~ s,> *"(.*)" *<,>\1<,; # try to remove extraneous quotes $line =~ s, \& , \&\; ,g; #encode ampersand $line =~ s, +, ,g; # get rid of multiple spaces, added to keep lines from running together. return $line; }