#!/usr/bin/env perl -w #Perl Homework 3 #prompt the user for an RCS Id print "Please enter the id of an RCS user\n"; chomp($rcs = ); ($dir) = glob("~$rcs"); if ($dir eq "~$rcs"){ #glob() failed die "No such user $rcs!\n"; } $dir .= "/public_html"; #open given directory and read files in it. opendir DIR, $dir or die "Cannot open directory $dir: $!\n"; @files = grep /.htm(l?)$/i, readdir DIR; #go through all the files in the directory foreach $file (@files){ #open the file if we can, print the name, and open a local copy if (!open FILE, "$dir/$file"){ print "Could not open file $file: $!\n"; next; } print "-" x 20, "\n"; print "Links in $file:\n"; print "-" x 20, "\n"; open OFILE, ">$file" or die "Cannot create file in current directory: $!\n"; #read the file into one big scalar { local $/; undef $/; $one_line = ; } close FILE; #find all tags @matches = ( $one_line =~ m{ tag - #we'll get rid of other attributes later) > #ends at a > (.*?) #the link reference <\s*/a\s*> #closing tag }xigs); #allow whitespace, case insensitive,... #global, treat as single line #Go through each of the matches for ($i=0; $i<@matches; $i+=2){ #increment by 2 - only look at address #get rid of other attributes #search for href= (with any spaces), then match and save all #of the following non-whitespace characters if ($matches[$i] =~ /href\s*=\s*(\S+)/i){ $matches[$i] = $1; } else { #no href= found next; } #get rid of quotes $matches[$i] =~ s/^["']|["']$//g; #change newlines to spaces, to make more readable $matches[$i] =~ s/\n/ /g; $matches[$i+1] =~ s/\n/ /g; #print link and reference print "\t$matches[$i] ==> $matches[$i+1]\n"; } #make substituions, print to local copy, close local copy #Below is the most compact way I could think of to do this - but it #won't quite work correctly, as it wouldn't replace the phrase #"Rennselaer University Polytechnic Institute", which should be replaced #following a strict interpretation of the Homework Description #$one_line =~ s/Rensselaer(\s+University)?(?!\s+Polytechnic\s+Institute)/RPI/gi; #so instead, we do this: $one_line =~ s/Rensselaer\s+University/RPI/gi; $one_line =~ s/Rensselaer(?!\s+Polytechnic\s+Institute)/RPI/gi; print OFILE $one_line; close OFILE; }