#!/usr/bin/perl # splitExcel # pulls top line off txt file specified, and will add it onto the beginning of all the files it creates # out of the remainder of lines. # all other lines will be judged based on the collection number portion of the item number in the # 2nd tab column. All lines for each collection will be gathered, and the collection name used to create # the output file, which will be collnum.txt. ## Copyright (c) 2010, The University of Alabama Libraries. ## Contributed by Jody DeRidder, 7/30/10. ## All rights reserved. ## Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: ## * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. ## * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in ## the documentation and/or other materials provided with the distribution. ## * Neither the name of The University of Alabama Libraries nor the names of its contributors may be used to endorse or promote products ## derived from this software without specific prior written permission. ##THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ##THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ##CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ##PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ##LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, ##EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $doMe = "/srv/deposits/hold/WadeHallCivilWar_remediated.txt"; #$doMe = "/srv/deposits/hold/u0001_2009001_thru_2009037.txt"; # output files go in ./splits directory $base = "/srv/scripts/metadata/splits/"; open (IN, $doMe) or die "can't read $doMe\n"; undef $first; while ($line = ){ if (!$first){ $first = $line;} elsif ($line =~ /([a-z]{1}\d{4}\_\d{7}).*/){ # print "found a match! $1\n"; $collnum = $1; push (@{$collLists{$collnum}}, $line); } else{ print "no match! $line\n";} } close(IN); @colls = keys (%collLists); foreach $coll (@colls){ $mine = $base.$coll.".txt"; open (OUT, ">".$mine) or die "can't write to $mine\n"; print OUT $first; @mylist = @{$collLists{$coll}}; foreach (@mylist){ print OUT $_;} close OUT; }