use strict; use warnings; my %linkers; # key is link target; value is array of link sources sub main { my @files = <0*.html>; for my $file (@files) { my @linked_files = extract_linkees( strip_header_and_footer( slurp_file($file))); for my $linked_file (@linked_files) { record_backlink($file, $linked_file); } generate_empty_file(get_backlinks_file_name($file)); } for my $linkee (keys %linkers) { generate_backlinks_file($linkee, $linkers{$linkee}); } } sub slurp_file { my ($filename) = @_; open FILE, "<$filename" or return ""; my @lines = ; close FILE; chomp @lines; return join(" ", @lines); } sub strip_header_and_footer { my ($content) = @_; $content =~ s/.*class="title"//; $content =~ s/class="posted".*//; return $content; } sub extract_linkees { my ($content) = @_; return $content =~ /href="(0[0-9]+\.html)"/g; } sub record_backlink { my ($linker, $linkee) = @_; my $linkers = $linkers{$linkee}; if ($linkers) { my $linker_already_in_list = (grep /^$linker$/, @$linkers); push @$linkers, $linker unless $linker_already_in_list; } else { $linkers{$linkee} = [$linker]; } } sub get_post_title { my ($file) = @_; my $content = slurp_file($file) or return $file; my ($title) = $content =~ /(.*)<\/title>/; return $title; } sub get_post_date { my ($file) = @_; my $content = slurp_file($file) or return $file; my ($date) = $content =~ /Posted on (\w+\s+\d+,\s+\d+)/; return $date; } sub get_backlinks_file_name { my ($file) = @_; $file =~ s/\.html$//; $file .= ".backlinks"; return $file; } sub generate_empty_file { my ($file) = @_; open FILE, ">$file" or return; close FILE; } sub generate_backlinks_file { my ($linkee, $linkers) = @_; my $backlinks_file = get_backlinks_file_name($linkee); open BACKLINKS, ">$backlinks_file" or return; my $linkee_title = get_post_title($linkee); print BACKLINKS "<i>Followups to $linkee_title:<br>\n"; print BACKLINKS "<ul>\n"; for my $linker (@$linkers) { my $linker_title = get_post_title($linker); my $linker_date = get_post_date($linker); print BACKLINKS "<li><a href='$linker'>$linker_title</a>\n"; } print BACKLINKS "</ul></i>\n"; close BACKLINKS; } main();