#!/usr/bin/perl -w
#----------------------------------------------------------------------------
# $Id: nometa,v 1.26 1999/05/02 06:27:12 yschoe Exp $
#
# nometa is a Web proxy that relays HTTP requests and get rid of the
# damn M$ tags from the received document before handing it over to the
# web browser. All other content types such as gif/jpg images will be
# transferred transparently. Both GET and POST methods are supported, so
# all types of html forms could be used.
#
# usage: nometa [-d|--debug|]
#
# -d | --debug : print debugging messages and actual HTTP requests
# : specify the port where this proxy server
# is going to be running. If no argument is
# given, $proxy_port (7890) is going to be used
# by default.
# examples>
#
# nometa : run on port $proxy_port (7890). message off.
# nometa -d : run on port $proxy_port (7890). message on.
# nometa --debug : run on port $proxy_port (7890). message on.
# nometa 5555 : run on port 5555. message off.
#
# 1. Run nometa as described above.
#
# 2. Set the proxy server in your web browser to:
#
# host: the hostname of the host you are running this program
# port: the value of $proxy_port below, or given above
#
# 3. Use your browser as usual. All traffic will be relayed and
# filtered by nometa proxy.
#
# 4. If you want to stop using nometa, just reset your browser's
# proxy setting to the default and kill the nometa server by
# pressing CTRL-C.
#
# *. NEVER run this script as root. This script will work just as well
# even when it is run as a regular user process.
#
# Customizing: Go straight to the " 0. Configure this " section below.
#
# Requirement: perl, version 5.004_04 or higher
#
# Bugs: Turning on the trailer can cause some problems with javascript, etc.
# The default is OFF.
#
# The debug messages may not print out synchronized because
# the fork()ed processes are not coordinated in any way.
#
# Perl chokes out an error when the hostname of the URL is
# not resolvable -- however, it doesn't cause any problem
# in the following requests.
#
# Tips: This script could be easily altered to selectively send
# stuff from what your browser usually sends to the web server,
# such as Referer:, User-Agent:, etc. Run nometa with the --debug
# flag to see what your browser is sending out. You could disable
# cookies too. See program section 2.2.8.
#
# Why?: I wrote this script because M$ Frontpage uses these ridiculuous
# tags and it prevents browsers
# like netscape from displaying Hangul properly.
#
# Copyright (C) 1999 Yoonsuck Choe
# http://www.cs.utexas.edu/users/yschoe
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# http://www.gnu.org
#
#----------------------------------------------------------------------------
use strict;
use IO::Socket;
#----------------------------------------------------------------------------
# 0. Configure this
#
# $allowed_host : only request from this host is allowed
# $proxy_port : port where this proxy server would be listening
# -- this could be overridden by command line arg.
# $trailer_flag : if turned on, it lets you know that the html document
# is filtered by nometa. Doesn't work properly, so
# it is advised that you leave the value set to 0.
#----------------------------------------------------------------------------
my $allowed_host = "172.16.1.3";# host allowed to use the service
my $proxy_port = 7890; # default proxy port
my $trailer_flag = 0; # 0: turn off trailer, 1: turn on trailer
#----------------------------------------------------------------------------
# End of configurable area
#----------------------------------------------------------------------------
my ($nodebug);
if ($#ARGV == 0) {
if ($ARGV[0] eq "-d" || $ARGV[0] eq "--debug") {
$nodebug = 0;
} elsif ($ARGV[0]>0) {
$proxy_port = $ARGV[0];
} else {
$nodebug = 1;
}
} else {
$nodebug = 1;
}
#----------------------------------------------------------------------------
#
# 1. Start up the proxy server
#
#----------------------------------------------------------------------------
my $hostname = `hostname`; chop $hostname;
print "[NoMETA!]\n";
print "Set your browser\'s http proxy to :\n";
print "\t host\t= $hostname\n";
print "\t port\t= $proxy_port\n";
print "Access is granted to host $allowed_host\n";
print "Press CTRL-C to terminate the program.\n";
my $sock = new IO::Socket::INET(
LocalHost => 'localhost',
LocalPort => $proxy_port,
Proto => 'tcp',
Listen => SOMAXCONN,
Reuse => 1);
$sock or die "no socket :$!";
#----------------------------------------------------------------------------
#
# 2. Huge loop: Get connection request and process it
#
#----------------------------------------------------------------------------
my($new_sock, $c_addr,$pid);
$SIG{CHLD} = sub {
wait();
$nodebug || print ">> EXIT (process $pid)\n";
}; # prevent zombies
while (($new_sock, $c_addr) = $sock->accept()) {
$pid = fork();
#------------------------------------------------------------
# 2.1 CHILD: Send request and receive and process document
#------------------------------------------------------------
if ($pid==0) {
my ($cmd, $url, $prot,$ver, $buf);
my ($tprot, $wwwhost, $path, $targetport);
# 2.1.1 Check the client address and port
my ($client_port, $c_ip) =
sockaddr_in($c_addr);
my $client_ipnum = inet_ntoa($c_ip);
$nodebug || print "\n[connection from: $client_ipnum ]";
# 2.2.2 Check if request is from a legal ip address: for security reasons
if ($client_ipnum ne $allowed_host) {
print $new_sock " \n";
print $new_sock " NoMETA!
\n";
print $new_sock "You are not allowed to access this service.
\n";
print $new_sock "Set the \$allowed_host variable and try again.";
print $new_sock "
\n";
&trailer($new_sock);
close $new_sock;
exit(0);
}
# 2.2.3 Fetch the first line of the HTTP request
$buf = <$new_sock>;
$nodebug || print "------------\n";
chop $buf;
($cmd,$url,$prot) = split(/[\s]+/,$buf);
# 2.2.4 parse URL : get protocol, data and path
($tprot,$wwwhost,$path) = split(/[\/]+/,$url,3);
# 2.2.5 Check protocol and assign appropriate port
# Actually, if you only set the http proxy in your browser,
# it'll be fine -- no need to worry about the ftp request.
if ($tprot eq "http:") {
$targetport = 80;
} elsif ($tprot eq "ftp:") {
$targetport = 22;
}
# 2.2.6 check if port is explicitly specified like http://host:8000/path/
if ($wwwhost =~ /:/) {
($wwwhost,$targetport) = split(/:/,$wwwhost);
}
# 2.2.7 Connect to the web server and send the "GET" request
my $wwwsock = new IO::Socket::INET(
PeerAddr => $wwwhost,
PeerPort => $targetport,
Proto => 'tcp');
if (!$wwwsock) {
close $new_sock;
die "no socket :$!";
}
print $wwwsock "$cmd /$path $prot\n";
$nodebug || print "sending> $cmd /$path $prot\n";
# 2.2.8 Now that the connection is made, send all the rest of the
# request
# - specific items could be filtered out here
# by selectively sending request items.
my $form_tag = 0;
while (defined ($buf = <$new_sock>)) {
print $wwwsock "$buf";
$nodebug || print "sending> $buf";
# If it is the last line of request from the client,
# exit this loop
if ($buf !~ /^[A-Za-z0-9]/) {
# In case it is a POST, one extra line is
# appended as input to the CGI -- read it, and
# send it to the web server
if ($cmd eq "POST") {
$nodebug || print ">> POST method\n";
$buf = <$new_sock>;
print $wwwsock "$buf";
$nodebug || print "sending> $buf";
}
$nodebug || print ">> END of REQUEST\n";
last;
}
}
# 2.2.9 Finally, get the result from the web server
# and get rid of the META tag and send it back to the client
my($wwwbuf,$junk);
while (defined ($wwwbuf = <$wwwsock>)) {
# GET rid of the meta tag! arghh~
# - BIG ASSUMPTION: a single META tag fits in a single buffer.
# - This may slow down the transfer because it looks for the
# tag all the time.
$wwwbuf =~ s/<[mM][eE][tT][aA].+>//g;
print $new_sock $wwwbuf;
# uncomment this to see what you're receiving
# print $wwwbuf;
}
# 2.2.10 Close the www socket
close $wwwsock;
# 2.2.11 Print a small trailer to tell the user that this page
# was fetched through the nometa proxy.
# Blindly slapping the trailer on to any form of data seems to
# cause a problem, for example, when slapped at the end of a
# javascript file, it'll cause an error when interpreting it.
if ($trailer_flag) {
&trailer($new_sock);
}
# 2.2.12 Close the proxy request server socket
close $new_sock;
# 2.2.13 Terminate the child process
exit(0);
}
#------------------------------------------------------------
# 2.2 else, PARENT : clean up
#------------------------------------------------------------
if ($new_sock) { close $new_sock; };
}
#----------------------------------------------------------------------------
#
# 3. Close the proxy server socket
#
#----------------------------------------------------------------------------
close $sock;
#----------------------------------------------------------------------------
# Subroutine: trailer($socket) : send trailer through $socket
#----------------------------------------------------------------------------
sub trailer {
my($new_sock) = @_;
print $new_sock "
NoMETA! ";
print $new_sock "by yschoe\@cs.utexas.edu : ";
print $new_sock '$Id: nometa,v 1.26 1999/05/02 06:27:12 yschoe Exp $';
}