#!/usr/bin/perl

@NEVERBAN = (
    "iwon\.com\$",
    "compusa\.com\$",
    "macromedia\.com\$",
    "fcps\.edu\$",
    "microsoft\.com\$",
    "medianext\.com\$",
    "netscape\.com\$",
    "akamai\.net\$",
    "chevrolet\.com\$",
    "yahoo\.com\$",
    "amazon\.com\$",
    "geocities\.com\$", 
    "topica\.com\$",
    "verisign.com\$",
    "expedia\.com\$",
    "travelocity\.com\$",
    "ebay\.com\$",
    "hotbot\.com\$",
    "msn\.com\$",
    "tripod\.com\$",
    "w3\.org\$",
    "brinkster\.com\$"
    );

$body = "";

while (<>) {
  $body .= $_;
}

# convert the =HH where HH is a hex digit to the character represented.
$body =~ s/=([0-9a-fA-F][0-9a-fA-F])/pack("H2",$1)/ge;

# convert the "=\n" and "=\r\n" at the end of lines to their actual
# representations
$body =~ s/=\r\n[ \t]*//g;
$body =~ s/=\n[ \t]*//g;

# Remove all newlines
$body =~ s/\n/ /g;

while (($offset = index($body, "http:")) > 0) {
  $body = substr($body, $offset + 7);
  $eourl = index($body, "/");
  $url = substr($body, 0, $eourl);
  $url =~ s/[ \t"].*//g;
  if (
      $url =~ /\.net$/ ||
      $url =~ /\.com$/ ||
      $url =~ /\.bz$/ ||
      $url =~ /\.biz$/
  ) {
    $url =~ s/^.*\.([^\.]*)\.([^\.]*)$/$1.$2/g;
  }
  # remove '@' signs from url
  $url =~ s/^.*@(....*\..*)/$1/;
  # remove traling ? and extra data
  $url =~ s/^(.*)\?.*$/$1/;
  # remove port numbers
  $url =~ s/^(.*):[0-9]*$/$1/;
  # only put it in the array if it still has a '.' in it somewhere
  if ($url =~ /\./) {
    $BADGUYS{$url} = 1;
  }
}

foreach $url (sort(keys(%BADGUYS))) {
  $skip = 0;
  foreach $nb (@NEVERBAN) {
    if ($url =~ /$nb/) {
      $skip = 1;
      break;
    }
  }
  if ($skip == 0) {
    $reurl = $url;
    $reurl =~ s/\./\\./g;
    printf("/%s/		REJECT shut your spamhole: %s\n", $reurl, $url);
  }
}
