#!/lusr/bin/perl
use File::Find;
use File::Basename;

$indirectory = $ARGV[0];
$servername = $ARGV[1];

# Issue the find command passing two arguments
# The first argument is the subroutine that will be called for each file in the path.
# The second argument is the directory to start your search in.
find(\&inline_lister, "$indirectory");

sub inline_lister {
  
  if ((/\.html$/) || (/\.htm$/)||(/\.shtml$/)) {
    
    my $file = $File::Find::name ; 
    # print "$file\n";
    # open(IN, "<$file") or die "File $file couldn't be opened"; 
    open(IN, $_) or die "File $file couldn't be opened"; 
    # open(OUT, ">>$outfile") or die "File $outfile couldn't be opened";  
    @lines=<IN>; 
    #@lines=<>;
    
    # print @lines;
    
    $text = join "\n", @lines;
    
    #print $text;
    
    @srcs=($text =~ m|src\s*=\s*\"([^\"]+)\"|ig); 
    # @hrefs=($text =~ m|href\s*=\s*\"([^\"]+)\"|ig); 
    
    # print @srcs;
    
    $count = 1; 
    
    #print "<P>list of href values<BR>\n"; 
    #foreach $href (@hrefs) { 
    #   print " \"$href\",\n"; 
    #   $count++; 
    #} 
    
    # print "<P>list of src values<BR>\n"; 
    
    $dirname = substr(dirname($file),1);
    # print "$dirname\n";

    printf ("%s ",substr($file,1));

    foreach $src (@srcs) { 
      
      if(substr($src,0,7) eq "http://") {
	$domain = substr($src, 7, index ($src, "/", 7)-7);
	
      # If absolute and same server, Strip absolute part from the links
      # If absolute and different server, ignore
	if($domain eq $servername) {
	  $restoffile = substr($src,index($src, "/",7));
	  print "$restoffile ";
	}
      }
      
      else {
      # if relative, add complete directory information
	printf ("%s/%s ",$dirname, $src);
      }
      
      #      print "$src "; 
    } 
    print "\n";
    close(IN); 
    
    
  }
}

