# An obsession with data (normalization) => .. Up a Level # Go through all the files in the combined and pull out the first column (the filename). while () { # Clean up the line. chomp; ``` # Pull out the fist column. my @p = split(/t/); my $f = $p[0]; # Test to see if the file exists. if (! -f $f) { # The file doesn't exist, so complain. print STDERR "Can't find $fn"; } ``` } ## Keep track of files we've already seen. %seen = (); ## Go through the input. while () { # Clean up the input line. chomp; ``` # Split out the columns. my @p = split(/t/); # If we already saw it, tell the user we skipped it. If not, then # print it out and add it to the hash so we don't print it again. if (exists $seen{$p[0]}) { print STDERR "Skipping $p[0]n"; } else { print "$_n"; $seen{$p[0]} = 1; } ``` } > In Miw?fu, they call those who cannot use magic `miw: bachir?ma`. Translated into Lorban, it means “cursed to be forever mundane.” – Awakened Magic, Dastor Malink $ ## ## Setup ## ## Directives use strict; use warnings; ## ## Parameters ## ## The first parameter is the directory. my $dir = $ARGV[0]; die “USAGE: dir input” unless -d $dir; my $input = $ARGV[1]; die “USAGE: dir input” unless -f $input; ## ## Slurp up the contents of the file. ## my %files = (); open FILES, "<$input" or die "Cannot open input $input ($!)"; while () { chomp; my @p = split(/t/); $files{$p[0]} = $p[1]; } close FILES; ## ## Get a list of all the files in the directory. ## open PIPE, “find ‘$dir’ -type f |” or die "Cannot open pipe ($!)"; while () { # Clean up the line. chomp; ``` # Ignore . files. next if m@/.@; next unless m@.txt$@; # Trim off the leading characters. s@./@@; # See if the file exists. my $file = $_; if (exists $files{$file}) { # We found the file. my $date = $files{$file}; print "HIT $date $filen"; # Pull out the entries so we can report what was missing after # we're done processing. delete $files{$file}; # Open up the file and read in the metadata section, looking # for an already existing date. my $found_date = 0; open FILE, "<$file" or die "Cannot open $file ($!)"; while () { # Clean up the line. chomp; if (m@^* Date:s*(.*?)$@) { my $old_date = $1; $found_date = 1; if ($date eq $old_date) { # Nothing to do, we're good. last; } print " $1n"; } } close FILE; # If we didn't find the date, we need to add it into the file. unless ($found_date) { my $need_date = 1; print " Date: $daten"; open IN, "tmp" or die "Cannot open tmp ($!)"; while () { print OUTPUT $_; if ($need_date && $_ =~ /^= /) { print OUTPUT "* Date: $daten"; $need_date = 0; } } close IN; close OUTPUT; rename($file, "$file.bak"); rename("tmp", $file); } } else { # Print the file. #print "SKIP $_n"; } ``` } close PIPE; ## Write out any remaining files. foreach my $file (sort(keys(%files))) { print “MISS $filen”; } # Metadata Categories: => /categories/programming/ Programming => /categories/writing/ Writing Tags: => /tags/perl/ Perl # Footer Below are various useful links within this site and to related sites (not all have been converted over to Gemini). => /now/ Now => /contact/ Contact => /bio/ Biography => /bibliography/ Bibliography => /support/ Support => /fiction/ Fiction => //fedran.com/ Fedran => https://mfgames.com/ Coding => https://moonfire.us/ The Moonfires => /categories/ Categories => /tags/ Tags => /privacy/ Privacy => /colophon/ Colophon => /license/ License => https://lists.typewriter.press/subscription?f=RDQ6f3AFHXnX2o763d5TgUmaYP7N763gR6FjZyGUUFWhyRkpgZF9I35ySICDBEdFFtgG Mailing List => https://d.moonfire.us/blog/2012/04/29/an-obsession-with-data-normalization/