#!/usr/bin/perl use DBI; # generateMods.pl # this version to be run from jeremiah's home directory, on Cabaniss # working over jpegs all in the same directory, instead of tiffs # checks first for MODS in web directory. # if there, will not create MODS for that item # expects filenames that reflect the EAD id # this script parses box number, folder number # pulls out filename ids of items # looks for number of pages belonging to this item # looks up PURL in database # if not there, outputs error # takes MODS template, and substitutes in values for this item # Item number (for title), box, folder, extent, PURL # outputs a MODS for this item into MODS directory, named for the # item ID # puts copies for archiving into deposits directory ##Copyright (c) 2009, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 12/11/09. ##All rights reserved. ##Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the ## distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # jody DeRidder, 12/11/09 $modsDir = "../MODS/"; $content = "../jpegs/"; $template = "./MODStemplate.xml"; $timestamp = &gettime; $output = "../output/MODS_README_$timestamp"; open (OUT, ">".$output) or die "can't open $output\n"; # values to replace in template include: # TITLE_HERE, EXTENT_HERE, PURL_HERE, FILEID_HERE, BOX_HERE, FOLDER_HERE # and of course, the filename. $coll = "u0003_0000252"; $modsCopies = "/srv/deposits/Cabaniss/u0003_0000252_Cabaniss/Metadata/"; $webdirs = "/srv/www/htdocs/content/u0003/0000252/"; push (@mydirs, $webdirs); foreach $dir (@mydirs){ opendir(DIR, $dir) or die "can't open $dir\n"; while ($file = readdir(DIR)){ if ($file =~ /^\./){ next;} # skip dot files $path = $dir.$file; if (-d $path){ push (@mydirs, $path."/");} elsif ($file =~ /^(.*?)\.mods\.xml/){ push (@gotThese, $1); } } close(DIR); } # now we have all ids for existing MODS in @gotThese # put database login stuff here $hostname = "localhost"; $port = "3306"; $user = "addData"; $password = "moreStuff"; $database = "InfoTrack"; $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; # first, get the template open(TEMP, $template) or die "can't read in $template\n"; while ($line = ){ push (@template, $line); } close (TEMP); opendir(JPEGS, $content) or die "can't look through $content\n"; while ($file = readdir(JPEGS)){ if ($file =~ /^\./){ next; # skip dot files } # this does NOT process subpages! # print "looking at $file\n"; if ($file =~ /^$coll\_(\d{2})(\d{2})(\d{3})(\_(\d{4}))?\_2048\.jpg$/){ # the largest; be aware there may be $item = $coll."_".$1.$2.$3; $box = $1; $folder = $2; $item_sequence = $3 + 0; # gets rid of padding zeros # print "found $item\n"; if ($4){ $thisPage = $5 + 0; #remove padding zeros } else{ undef $thisPage;} undef $found; foreach $todo (@doThese){ # first check to see if I picked up one of this item's images if ($todo eq $item){ # already. This list should be shorter soon than the other $found = 1; $ItemExtent{$item} += 1; # add a page # print " already on my list.\n"; } } if (! $found){ foreach $done (@gotThese){ if ($done eq $item){ $found = 1;} # print " Already have a MODS\n";} # now check to see if I made the MODS already } if (!$found){ push (@doThese, $item); # if not, put it on my todo list # print "Need to process $item\n"; $ItemExtent{$item} = 1; $ItemFolder{$item} = $folder; $ItemBox{$item} = $box; $ItemTitle{$item} = "Item $item_sequence"; } } } elsif (!( $file =~ /\_512.jpg$/ || $file =~ /\_128.jpg$/ || $file =~ /thumbs\.db/i)){ push(@errors, "ERROR! Check filename. DID NOT PROCESS: $file\n"); } } close(JPEGS); $num = scalar (@doThese); if ($num < 1){ print "Did not find any new items to link!! Exiting.\n"; exit;} else{ print "Found $num new items to link. Processing now\n";} $subtract = 0; $all = (scalar @doThese); while (($item, $extent) = each (%ItemExtent)){ $digFiles += $extent; } print OUT "\nFound $all items for new MODS, consisting of $digFiles digital files\n"; @allitems = sort by_number (keys (%ItemTitle)); foreach $item (@allitems){ undef $mypurlnum; # fetch a purl here # check first to see if we have one yet for this item $id2009 = $dbh->quote($item); $sth = $dbh->prepare("select purlnum, realurl from lookup where id_2009 = $id2009") or die "can't look for $id2009 in database: ",$dbh->errstr(),"\n"; $sth->execute() or die "Can't look for $id2009 in lookup : ", $sth->errstr(),"\n"; ($mypurlnum, $myrealurl) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); $sth->finish(); if (! $mypurlnum){ push (@errors, "ERROR: $id2009 has NO PURL! Did NOT make MODS\n"); } else{ $url = "http://purl.lib.ua.edu/".$mypurlnum; # print "$item: ".$ItemTitle{$item}." Extent: ".$ItemExtent{$item}."\n"; # print " Box ".$ItemBox{$item}." Folder ".$ItemFolder{$item}."\n"; # print " PURL: $url\n"; # values to replace in template include: # TITLE_HERE, EXTENT_HERE, PURL_HERE, FILEID_HERE, BOX_HERE, FOLDER_HERE # and of course, the filename. $mymods = $modsDir.$item.".mods.xml"; open (MYMODS, ">".$mymods) or die "can't write to $mymods\n"; foreach $line (@template){ if ($line =~ /^(.*?)TITLE_HERE(.*)/){ print MYMODS $1.$ItemTitle{$item}.$2."\n"; } elsif ($line =~ /^(.*?)EXTENT_HERE(.*)/){ print MYMODS $1.$ItemExtent{$item}." p.".$2."\n"; } elsif ($line =~ /^(.*?)PURL_HERE(.*)/){ print MYMODS $1.$url.$2."\n"; } elsif ($line =~ /^(.*?)FILEID_HERE(.*)/){ print MYMODS $1.$item.$2."\n"; } elsif ($line =~ /^(.*?)BOX_HERE(.*)/){ print MYMODS $1.$ItemBox{$item}.$2."\n"; } elsif ($line =~ /^(.*?)FOLDER_HERE(.*)/){ print MYMODS $1.$ItemFolder{$item}.$2."\n"; } elsif ($line =~ /^(.*?)DATE_HERE(.*)/){ print MYMODS $1.$date.$2."\n"; } elsif ($line =~ /^(.*? PRELIMINARY METADATA GENERATED BY SCRIPT )(.*)/){ print MYMODS $1.$timestamp." $2\n"; } else{ print MYMODS $line;} } close (MYMODS); $copy = $modsCopies.$item.".mods.xml"; copy ($mymods, $copy) or push(@errors, "WARNING: Could not copy $mymods to $copy\n";); } } $dbh->disconnect(); if (@errors){ print "All done. Check the README file in the output directory for errors, please!\n"; foreach (@errors){ print OUT$_;} sleep(3); close(OUT); } else{ print "All done! Please check MODS directory for results. Thank you!\n"; sleep(3); } sub by_number {$a <=> $b;} sub gettime{ # $date = `date`; # $time = `time`; ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); #gmtime(); #$mydate); $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $date = "$year-$mon-$mday"; return "$year-$mon-$mday\T$hour:$min:$sec\Z"; }