#!/usr/bin/env perl
use strict;
use warnings;
use Carp;
use feature ":5.10";

die "Usage: $0 <file1> <file2> ...\n" unless @ARGV;

#Set up some regexp parts to improve readability
#Emails
my $user   = qr/[^\s\@]+/;  #username is anything except whitespace or '@' sign.
my $domain = qr/[^\s\@]+(?:\.[^\s\@.]+)+/;  #domain is anything except whitespace or '@' sign, but must have at least one period
#Phone numbers
my $acode  = qr/(?:\(\d{3}\)|\d{3})/;  #area code is three digits possibly surrounded by parens
my $pref   = qr/\d{3}/;  #prefix is three digits
my $exch   = qr/\d{4}/;  #exchange is four digits
#addresses
my $street = qr/\S+\s+(?:Rd|St|Cir|Blvd|Ct|Ln|Dr|Ave)/; #non-space, at least one space, one of the street types
my $city   = qr/\S+/;   #city is just non-whitespace
my $state  = qr/[A-Z]{2}/;  #state is two capitals
#dates
my $year   = qr/\d{4}/;  #year is four digits
my $month  = qr/\d{1,2}/;  #month is one or two digits
my $day    = qr/\d{1,2}/;  #so is day
#times
my $hour_12= qr/(?:\d|1[012])/;  #12 hour is a single digit, or two digits from 10 - 12
my $hour_24= qr/(?:0?\d|1\d|2[0-4])/;  #24 hour is a single digit, two digits starting with 1, or two digits from 20 to 24
my $minute = qr/[0-5]\d/; #minute is two digits from 00 to 59
my $second = qr/(?:[0-5]\d|6[01])/;  #seconds is two digits from 00 to 61 (to account for weirdo leap seconds
my $ampm   = qr/[ap]\.?m\.?/i;  #am/pm may be either case and may or may not have periods

for my $file (@ARGV) {
   open my $fh, '<', $file or die "Cannot open $file: $!";
   local $/ = ""; #enable paragraph mode
   say "File: $file";
   while (my $para = <$fh>) {
      say "Paragraph $.:";
      #Email:  user @ domain
      if (my @emails = $para =~ /$user  \@  $domain/gx) {
         say 'Emails: ', join(', ', sort by_domain_user @emails); 
      }
      #Phone: possible area code, prefix, exchange, possibly separated by spaces, hyphens, periods
      if (my @phones = $para =~ /(?:$acode  (?:\s+|-|\.)?)?  $pref  (?:\s+|-|\.)?  $exch/gx) {
         #convert newlines to spaces
         tr/\n/ / for @phones;
         say 'Phones: ', join(', ', sort by_code_prefix @phones);
      }
      #Address:  number, street, possibly city comma state
      if (my @addrs = $para =~ /\d+  \s+  $street  (?: \s+  $city, \s+  $state)?/gx) {
         #convert newlines to spaces
         tr/\n/ / for @addrs;
         say 'Addresses: ', join(', ', sort by_state_city_street @addrs);
      }
      #Date: year month day, or month day year, separated by hyphens or dashes
      if (my @dates = $para =~ m!(?:$year [-/] $month [-/] $day) | (?:$month [-/] $day [-/] $year)!gx) {
         say 'Dates: ', join(', ', sort by_year_month_day @dates);
      }
      #Time: 12-hour, minute, possible second, possible am/pm OR 24-hour, minute, psosible second 
      if (my @times = $para =~ /(?: $hour_12 : $minute (?: : $second)?) (?:$ampm)? | (?: $hour_24 : $minute (?: : $second)? )/gx) {
         say 'Times: ', join(', ', sort by_hour_minute_second @times);
      }
      print "\n";
   }
}


#pretty much all of these should be taking advantage of a 
#Schwartzian Transform to increase efficiency, but since
#the duedate for HW2 was before we covered
#references, I'm omitting them.



sub by_state_city_street {
   my ($num_a, $street_a, $city_a, $state_a) = 
      $a =~ /^(\d+)\s+($street)(?:\s+($city),\s+($state))?$/;
   my ($num_b, $street_b, $city_b, $state_b) = 
      $b =~ /^(\d+)\s+($street)(?:\s+($city),\s+($state))?$/;

   #take care of possible undefined cities and states
   $_ ||= '' for $state_a, $city_a, $state_b, $city_b;

   #order by state, fall back to city, fall back to street, fall back to number

   return(($state_a cmp $state_b) || ($city_a cmp $city_b) || ($street_a cmp $street_b) || ($num_a <=> $num_b));
}
      
sub by_domain_user {
   my ($a_usr, $a_dom) = split '@', $a;
   my ($b_usr, $b_dom) = split '@', $b; 

   return $a_dom cmp $b_dom or $a_usr cmp $b_usr
}

sub by_code_prefix {
   #going to strip all non-numerics from phone numbers for
   #easier sorting.  Need to make copies, since changes
   #to $a or $b would affect the actual array items
   my ($mod_a, $mod_b) = ($a, $b);

   #transliterate all non-numbers (using /c modifier for compliment)
   #to nothing (using the /d modifier to delete) for each
   #element of the list $mod_a, $mod_b
   tr/0-9//dc for $mod_a, $mod_b;

   #phone numbers without area codes come first
   return((length($mod_a) <=> length($mod_b)) || ($mod_a <=> $mod_b));
}

sub by_hour_minute_second {
   my ($h_a, $m_a, $s_a) = ($a =~ /\d+/g);
   my ($h_b, $m_b, $s_b) = ($b =~ /\d+/g);

   #If this is pm, add 12 to the hours
   if ($a =~ /p\.?m\.?$/i) {
      $h_a += 12;
   }
   if ($b =~ /p\.?m\.?$/i) {
      $h_b += 12;
   }

   #compare hours, fall back to minutes, fall back to seconds.
   #account for the fact that the seconds may be undefined.
   return(($h_a <=> $h_b) || ($m_a <=> $m_b) || (($s_a || 0) <=> ($s_b || 0)));
}

sub by_year_month_day {
   my ($y_a, $m_a, $d_a, $y_b, $m_b, $d_b);

   #Parse out the two possibilities of year-month-day or month-day-year
   if ($a =~ m!^($year)[-/]($month)[-/]($day)$!) { 
      ($y_a, $m_a, $d_a) = ($1, $2, $3);
   } elsif ($a =~ m!^($month)[-/]($day)[-/]($year)$!) { 
      ($m_a, $d_a, $y_a) = ($1, $2, $3);
   } else {
      croak "Internal error.  Date passed to sort is not a date: '$a'\n";
   }
   if ($b =~ m!^($year)[-/]($month)[-/]($day)$!) { 
      ($y_b, $m_b, $d_b) = ($1, $2, $3);
   } elsif ($b =~ m!^($month)[-/]($day)[-/]($year)$!) { 
      ($m_b, $d_b, $y_b) = ($1, $2, $3);
   } else {
      croak "Internal error.  Date passed to sort is not a date: '$b'\n";
   }

   #compare year, fall back to month, fall back to day
   return(($y_a <=> $y_b) || ($m_a <=> $m_b) || ($d_a <=> $d_b));
}
