#!/usr/bin/perl -w #---------------------------------------------------------------------------- # $Id: nometa,v 1.26 1999/05/02 06:27:12 yschoe Exp $ # # nometa is a Web proxy that relays HTTP requests and get rid of the # damn M$ tags from the received document before handing it over to the # web browser. All other content types such as gif/jpg images will be # transferred transparently. Both GET and POST methods are supported, so # all types of html forms could be used. # # usage: nometa [-d|--debug|] # # -d | --debug : print debugging messages and actual HTTP requests # : specify the port where this proxy server # is going to be running. If no argument is # given, $proxy_port (7890) is going to be used # by default. # examples> # # nometa : run on port $proxy_port (7890). message off. # nometa -d : run on port $proxy_port (7890). message on. # nometa --debug : run on port $proxy_port (7890). message on. # nometa 5555 : run on port 5555. message off. # # 1. Run nometa as described above. # # 2. Set the proxy server in your web browser to: # # host: the hostname of the host you are running this program # port: the value of $proxy_port below, or given above # # 3. Use your browser as usual. All traffic will be relayed and # filtered by nometa proxy. # # 4. If you want to stop using nometa, just reset your browser's # proxy setting to the default and kill the nometa server by # pressing CTRL-C. # # *. NEVER run this script as root. This script will work just as well # even when it is run as a regular user process. # # Customizing: Go straight to the " 0. Configure this " section below. # # Requirement: perl, version 5.004_04 or higher # # Bugs: Turning on the trailer can cause some problems with javascript, etc. # The default is OFF. # # The debug messages may not print out synchronized because # the fork()ed processes are not coordinated in any way. # # Perl chokes out an error when the hostname of the URL is # not resolvable -- however, it doesn't cause any problem # in the following requests. # # Tips: This script could be easily altered to selectively send # stuff from what your browser usually sends to the web server, # such as Referer:, User-Agent:, etc. Run nometa with the --debug # flag to see what your browser is sending out. You could disable # cookies too. See program section 2.2.8. # # Why?: I wrote this script because M$ Frontpage uses these ridiculuous # tags and it prevents browsers # like netscape from displaying Hangul properly. # # Copyright (C) 1999 Yoonsuck Choe # http://www.cs.utexas.edu/users/yschoe # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # http://www.gnu.org # #---------------------------------------------------------------------------- use strict; use IO::Socket; #---------------------------------------------------------------------------- # 0. Configure this # # $allowed_host : only request from this host is allowed # $proxy_port : port where this proxy server would be listening # -- this could be overridden by command line arg. # $trailer_flag : if turned on, it lets you know that the html document # is filtered by nometa. Doesn't work properly, so # it is advised that you leave the value set to 0. #---------------------------------------------------------------------------- my $allowed_host = "172.16.1.3";# host allowed to use the service my $proxy_port = 7890; # default proxy port my $trailer_flag = 0; # 0: turn off trailer, 1: turn on trailer #---------------------------------------------------------------------------- # End of configurable area #---------------------------------------------------------------------------- my ($nodebug); if ($#ARGV == 0) { if ($ARGV[0] eq "-d" || $ARGV[0] eq "--debug") { $nodebug = 0; } elsif ($ARGV[0]>0) { $proxy_port = $ARGV[0]; } else { $nodebug = 1; } } else { $nodebug = 1; } #---------------------------------------------------------------------------- # # 1. Start up the proxy server # #---------------------------------------------------------------------------- my $hostname = `hostname`; chop $hostname; print "[NoMETA!]\n"; print "Set your browser\'s http proxy to :\n"; print "\t host\t= $hostname\n"; print "\t port\t= $proxy_port\n"; print "Access is granted to host $allowed_host\n"; print "Press CTRL-C to terminate the program.\n"; my $sock = new IO::Socket::INET( LocalHost => 'localhost', LocalPort => $proxy_port, Proto => 'tcp', Listen => SOMAXCONN, Reuse => 1); $sock or die "no socket :$!"; #---------------------------------------------------------------------------- # # 2. Huge loop: Get connection request and process it # #---------------------------------------------------------------------------- my($new_sock, $c_addr,$pid); $SIG{CHLD} = sub { wait(); $nodebug || print ">> EXIT (process $pid)\n"; }; # prevent zombies while (($new_sock, $c_addr) = $sock->accept()) { $pid = fork(); #------------------------------------------------------------ # 2.1 CHILD: Send request and receive and process document #------------------------------------------------------------ if ($pid==0) { my ($cmd, $url, $prot,$ver, $buf); my ($tprot, $wwwhost, $path, $targetport); # 2.1.1 Check the client address and port my ($client_port, $c_ip) = sockaddr_in($c_addr); my $client_ipnum = inet_ntoa($c_ip); $nodebug || print "\n[connection from: $client_ipnum ]"; # 2.2.2 Check if request is from a legal ip address: for security reasons if ($client_ipnum ne $allowed_host) { print $new_sock " \n"; print $new_sock "

NoMETA!

\n"; print $new_sock "You are not allowed to access this service.
\n"; print $new_sock "Set the \$allowed_host variable and try again."; print $new_sock "
\n"; &trailer($new_sock); close $new_sock; exit(0); } # 2.2.3 Fetch the first line of the HTTP request $buf = <$new_sock>; $nodebug || print "------------\n"; chop $buf; ($cmd,$url,$prot) = split(/[\s]+/,$buf); # 2.2.4 parse URL : get protocol, data and path ($tprot,$wwwhost,$path) = split(/[\/]+/,$url,3); # 2.2.5 Check protocol and assign appropriate port # Actually, if you only set the http proxy in your browser, # it'll be fine -- no need to worry about the ftp request. if ($tprot eq "http:") { $targetport = 80; } elsif ($tprot eq "ftp:") { $targetport = 22; } # 2.2.6 check if port is explicitly specified like http://host:8000/path/ if ($wwwhost =~ /:/) { ($wwwhost,$targetport) = split(/:/,$wwwhost); } # 2.2.7 Connect to the web server and send the "GET" request my $wwwsock = new IO::Socket::INET( PeerAddr => $wwwhost, PeerPort => $targetport, Proto => 'tcp'); if (!$wwwsock) { close $new_sock; die "no socket :$!"; } print $wwwsock "$cmd /$path $prot\n"; $nodebug || print "sending> $cmd /$path $prot\n"; # 2.2.8 Now that the connection is made, send all the rest of the # request # - specific items could be filtered out here # by selectively sending request items. my $form_tag = 0; while (defined ($buf = <$new_sock>)) { print $wwwsock "$buf"; $nodebug || print "sending> $buf"; # If it is the last line of request from the client, # exit this loop if ($buf !~ /^[A-Za-z0-9]/) { # In case it is a POST, one extra line is # appended as input to the CGI -- read it, and # send it to the web server if ($cmd eq "POST") { $nodebug || print ">> POST method\n"; $buf = <$new_sock>; print $wwwsock "$buf"; $nodebug || print "sending> $buf"; } $nodebug || print ">> END of REQUEST\n"; last; } } # 2.2.9 Finally, get the result from the web server # and get rid of the META tag and send it back to the client my($wwwbuf,$junk); while (defined ($wwwbuf = <$wwwsock>)) { # GET rid of the meta tag! arghh~ # - BIG ASSUMPTION: a single META tag fits in a single buffer. # - This may slow down the transfer because it looks for the # tag all the time. $wwwbuf =~ s/<[mM][eE][tT][aA].+>//g; print $new_sock $wwwbuf; # uncomment this to see what you're receiving # print $wwwbuf; } # 2.2.10 Close the www socket close $wwwsock; # 2.2.11 Print a small trailer to tell the user that this page # was fetched through the nometa proxy. # Blindly slapping the trailer on to any form of data seems to # cause a problem, for example, when slapped at the end of a # javascript file, it'll cause an error when interpreting it. if ($trailer_flag) { &trailer($new_sock); } # 2.2.12 Close the proxy request server socket close $new_sock; # 2.2.13 Terminate the child process exit(0); } #------------------------------------------------------------ # 2.2 else, PARENT : clean up #------------------------------------------------------------ if ($new_sock) { close $new_sock; }; } #---------------------------------------------------------------------------- # # 3. Close the proxy server socket # #---------------------------------------------------------------------------- close $sock; #---------------------------------------------------------------------------- # Subroutine: trailer($socket) : send trailer through $socket #---------------------------------------------------------------------------- sub trailer { my($new_sock) = @_; print $new_sock "
NoMETA! "; print $new_sock "by yschoe\@cs.utexas.edu : "; print $new_sock '$Id: nometa,v 1.26 1999/05/02 06:27:12 yschoe Exp $'; }