#!/usr/bin/perl use DBI; # generateMods.pl # this version to be run from jeremiah's home directory, on Cabaniss # working over jpegs all in the same directory, instead of tiffs # checks first for MODS in web directory. # if there, will not create MODS for that item # expects filenames that reflect the EAD id # this script parses box number, folder number # pulls out filename ids of items # looks for number of pages belonging to this item # looks up PURL in database # if not there, outputs error # takes MODS template, and substitutes in values for this item # Item number (for title), box, folder, extent, PURL # outputs a MODS for this item into MODS directory, named for the # item ID # puts copies for archiving into deposits directory # jody DeRidder, 12/11/09 $modsDir = "../MODS/"; $content = "../jpegs/"; $template = "./MODStemplate.xml"; $timestamp = &gettime; $output = "../output/MODS_README_$timestamp"; open (OUT, ">".$output) or die "can't open $output\n"; # values to replace in template include: # TITLE_HERE, EXTENT_HERE, PURL_HERE, FILEID_HERE, BOX_HERE, FOLDER_HERE # and of course, the filename. $coll = "u0003_0000252"; $modsCopies = "/srv/deposits/Cabaniss/u0003_0000252_Cabaniss/Metadata/"; $webdirs = "/srv/www/htdocs/content/u0003/0000252/"; push (@mydirs, $webdirs); foreach $dir (@mydirs){ opendir(DIR, $dir) or die "can't open $dir\n"; while ($file = readdir(DIR)){ if ($file =~ /^\./){ next;} # skip dot files $path = $dir.$file; if (-d $path){ push (@mydirs, $path."/");} elsif ($file =~ /^(.*?)\.mods\.xml/){ push (@gotThese, $1); } } close(DIR); } # now we have all ids for existing MODS in @gotThese # put database login stuff here $hostname = "localhost"; $port = "3306"; $user = "addData"; $password = "moreStuff"; $database = "InfoTrack"; $dbh = DBI->connect("DBI:mysql:$database:$hostname:$port", $user, $password) or die "can't connect to database: ",$DBI::errstr,"\n"; $h->{PrintError} = 1; $h->{RaiseError} = 1; # first, get the template open(TEMP, $template) or die "can't read in $template\n"; while ($line = ){ push (@template, $line); } close (TEMP); opendir(JPEGS, $content) or die "can't look through $content\n"; while ($file = readdir(JPEGS)){ if ($file =~ /^\./){ next; # skip dot files } # this does NOT process subpages! # print "looking at $file\n"; if ($file =~ /^$coll\_(\d{2})(\d{2})(\d{3})(\_(\d{4}))?\_2048\.jpg$/){ # the largest; be aware there may be $item = $coll."_".$1.$2.$3; $box = $1; $folder = $2; $item_sequence = $3 + 0; # gets rid of padding zeros # print "found $item\n"; if ($4){ $thisPage = $5 + 0; #remove padding zeros } else{ undef $thisPage;} undef $found; foreach $todo (@doThese){ # first check to see if I picked up one of this item's images if ($todo eq $item){ # already. This list should be shorter soon than the other $found = 1; $ItemExtent{$item} += 1; # add a page # print " already on my list.\n"; } } if (! $found){ foreach $done (@gotThese){ if ($done eq $item){ $found = 1;} # print " Already have a MODS\n";} # now check to see if I made the MODS already } if (!$found){ push (@doThese, $item); # if not, put it on my todo list # print "Need to process $item\n"; $ItemExtent{$item} = 1; $ItemFolder{$item} = $folder; $ItemBox{$item} = $box; $ItemTitle{$item} = "Item $item_sequence"; } } } elsif (!( $file =~ /\_512.jpg$/ || $file =~ /\_128.jpg$/ || $file =~ /thumbs\.db/i)){ push(@errors, "ERROR! Check filename. DID NOT PROCESS: $file\n"); } } close(JPEGS); $num = scalar (@doThese); if ($num < 1){ print "Did not find any new items to link!! Exiting.\n"; exit;} else{ print "Found $num new items to link. Processing now\n";} $subtract = 0; $all = (scalar @doThese); while (($item, $extent) = each (%ItemExtent)){ $digFiles += $extent; } print OUT "\nFound $all items for new MODS, consisting of $digFiles digital files\n"; @allitems = sort by_number (keys (%ItemTitle)); foreach $item (@allitems){ undef $mypurlnum; # fetch a purl here # check first to see if we have one yet for this item $id2009 = $dbh->quote($item); $sth = $dbh->prepare("select purlnum, realurl from lookup where id_2009 = $id2009") or die "can't look for $id2009 in database: ",$dbh->errstr(),"\n"; $sth->execute() or die "Can't look for $id2009 in lookup : ", $sth->errstr(),"\n"; ($mypurlnum, $myrealurl) = $sth->fetchrow_array(); warn "Problem in fetchrow_array(): ",$sth->errstr(),"\n" if $sth->err(); $sth->finish(); if (! $mypurlnum){ push (@errors, "ERROR: $id2009 has NO PURL! Did NOT make MODS\n"); } else{ $url = "http://purl.lib.ua.edu/".$mypurlnum; # print "$item: ".$ItemTitle{$item}." Extent: ".$ItemExtent{$item}."\n"; # print " Box ".$ItemBox{$item}." Folder ".$ItemFolder{$item}."\n"; # print " PURL: $url\n"; # values to replace in template include: # TITLE_HERE, EXTENT_HERE, PURL_HERE, FILEID_HERE, BOX_HERE, FOLDER_HERE # and of course, the filename. $mymods = $modsDir.$item.".mods.xml"; open (MYMODS, ">".$mymods) or die "can't write to $mymods\n"; foreach $line (@template){ if ($line =~ /^(.*?)TITLE_HERE(.*)/){ print MYMODS $1.$ItemTitle{$item}.$2."\n"; } elsif ($line =~ /^(.*?)EXTENT_HERE(.*)/){ print MYMODS $1.$ItemExtent{$item}." p.".$2."\n"; } elsif ($line =~ /^(.*?)PURL_HERE(.*)/){ print MYMODS $1.$url.$2."\n"; } elsif ($line =~ /^(.*?)FILEID_HERE(.*)/){ print MYMODS $1.$item.$2."\n"; } elsif ($line =~ /^(.*?)BOX_HERE(.*)/){ print MYMODS $1.$ItemBox{$item}.$2."\n"; } elsif ($line =~ /^(.*?)FOLDER_HERE(.*)/){ print MYMODS $1.$ItemFolder{$item}.$2."\n"; } elsif ($line =~ /^(.*?)DATE_HERE(.*)/){ print MYMODS $1.$date.$2."\n"; } elsif ($line =~ /^(.*? PRELIMINARY METADATA GENERATED BY SCRIPT )(.*)/){ print MYMODS $1.$timestamp." $2\n"; } else{ print MYMODS $line;} } close (MYMODS); $copy = $modsCopies.$item.".mods.xml"; copy ($mymods, $copy) or push(@errors, "WARNING: Could not copy $mymods to $copy\n";); } } $dbh->disconnect(); if (@errors){ print "All done. Check the README file in the output directory for errors, please!\n"; foreach (@errors){ print OUT$_;} sleep(3); close(OUT); } else{ print "All done! Please check MODS directory for results. Thank you!\n"; sleep(3); } sub by_number {$a <=> $b;} sub gettime{ # $date = `date`; # $time = `time`; ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); #gmtime(); #$mydate); $mon ++; if ($mon < 10){ $mon="0".$mon;} #need 2 digits if ($sec < 10){ $sec="0".$sec;} if ($min < 10){ $min="0".$min;} if ($hour < 10){ $hour="0".$hour;} if ($mday < 10){ $mday="0".$mday;} $year = $year + 1900; $date = "$year-$mon-$mday"; return "$year-$mon-$mday\T$hour:$min:$sec\Z"; }