#!/lusr/bin/perl #---------------------------------------------------------------------------- # Program: "Web to Newsgroup gateway : web2news" # # Version: 0.2 # # Date: Sat May 8 15:47:00 CDT 1999 fixed security holes : Dont use v0.1!!! # Date: Thu May 7 00:08:21 CDT 1998 added fullheader arg in query. fixed # '<' and '>' problem (use ). # Date: Tue May 5 13:25:24 CDT 1998 fixed fullheader option # Date: Mon May 4 23:08:04 CDT 1998 initial version # # Description: # # A simple perl script that makes use of 'lynx' to display # newsgroup content on a web page. # # Author: Yoonsuck Choe <yschoe@cs.utexas.edu> Copyright (c) 1998 # # Licensing Terms : Freely distributable for private use only (with this # header section intact). # Permission of author required for commercial use. # Provided as-is. No warranty. # # Usage: how to invoke the script in an HTML document # # http://host/path/web2newslink?web2newslink=news://news.host/group.name # \~~~~~~~~~ # optional # You can use wildcards in the group name -- for example, # # comp.os.linux.* # # Beware -- if you omit the newsgroup name, and if your news server # archives thousands of newsgroups, it'll take forever to # load the newsgroup list. # # Source URL: # # http://www.cs.utexas.edu/users/yschoe/src/web2news.pl # # Requirements: # # Lynx Version 2.7.2 (1997) or better # Perl Version 5.003 or better # * may work with older versions, but I cannot tell for sure. # # TODO: # # - detecting error messages returned from lynx # - restricting access (from outside a certain domain) # # References: RFC 1036: Standard for USENET Messages. # RFC 1738: Uniform Resource Locators (URL) # #---------------------------------------------------------------------------- ############# Modify these parameters according to your local site setup ##### # # the lynx browser path $lynx = "/lusr/bin/lynx"; # how do you invoke this script? # (on my host, web2news.pl was renamed web2news.cgi) $cgipath = "http://www.cs.utexas.edu/users/yschoe/cgi-bin/web2news.cgi"; # color preferences (for article) $titlebarcolor = "\#eeffff"; # light cyan $headerfieldcolor = "pink"; $headervaluecolor = "\#ffeeee"; # light pink $textbgcolor = "\#eeeeff"; # light blue $subjcolor = "red"; $nonsubjcolor = "blue"; # table width (for articles) $width = 700; # print full header info? $fullheader = 0; # display - 0: partial header, 1: full header # #################### End of user configurable area ########################### # some common definitions $version = "0.1"; $srcurl = "http://www.cs.utexas.edu/users/yschoe/src/web2news.pl"; $trailer = "Script written by Yoonsuck Choe". "&lt<a href=\"mailto:yschoe\@cs.utexas.edu\"> yschoe\@cs.utexas.edu</a>&gt\n". "<br> <i> Powered by ". "<a href=\"http://lynx.browser.org/\">Lynx". "</a> and <a href=\"http://www.perl.org\">Perl</a></i>". " (<a href=\"$srcurl\">script source</a>)". "</body></html>"; # Print out HTML header print "Content-type: text/html\n"; print "\n"; # Print out Title print "<html>"; print "<body bgcolor=white>"; print "<table width=$width> <tr> <td bgcolor=$titlebarcolor>"; print "<h1 align=center> Web to Newsgroup Gateway (v$version) </h1>\n"; print "</td></tr></table>\n"; #print "[$link]\n"; # Get the query string : news article or newsgroup $query_string = $ENV{'QUERY_STRING'}; @arguments = split(/\&/,$query_string); if ($#arguments == -1) { # no args -- error print "<pre>"; print "web2news: no query argument given.\n"; print "</pre>"; } elsif ($#arguments == 0) { # one arg : <URL> ($field,$link)=split(/\=/,$arguments[0]); } elsif ($#arguments == 1) { # two args : <fullheader> <URL> ($f1,$f2) = split(/\=/,$arguments[0]); if ($f1 eq "fullheader") { $fullheader = $f2; } # else, ignore ($field,$link) = split(/\=/,$arguments[1]); } # Check for suspicious arguments if ( $link =~ /[^\\](\&|\;|\`|\'|\"|\||\*|\?|\~|\<|\>|\^|\(|\)|\[|\]|\{|\}|\$|\n|\r)/ || $field=~ /[^\\](\&|\;|\`|\'|\"|\||\*|\?|\~|\<|\>|\^|\(|\)|\[|\]|\{|\}|\$|\n|\r)/) { print "<pre>"; print "web2news: illegal character in the cgi query.\n"; print "</pre>"; print "<hr>"; print $trailer; exit 0; } if ($field ne "web2newslink"){ print "<pre>"; print "web2news: query argument error.\n"; print "</pre>"; print "<hr>"; print $trailer; exit 0; } # Check for malicious requests if ($link !~ /^news:\/\//) { print "<pre>"; print "web2news: query argument error.\n"; print "</pre>"; print "<hr>"; print $trailer; exit 0; } # Get the newsgroup listing with 'lynx' text browser in source retrieval # mode. open(BIGCHUNK,"$lynx -source $link|"); # check out the headline and determine if it is a article listing or # an actual article. @bigchunk = <BIGCHUNK>; close(BIGCHUNK); # Check if it is empty #print "$#bigchunk"; if ($#bigchunk == 1) { print "<pre> web2news : no such newsgroup or article.". "\n <i>$link</i>. </pre>"; print "<hr>"; print $trailer; exit 0; } $headline = $bigchunk[0].$bigchunk[1].$bigchunk[2]; # This is the main part $serverroot=0; # (1) if it is an article listing, just dump it if ($headline =~ /\<TITLE\>Newsgroup/) { if ($headline =~ /\<TITLE\>Newsgroups\<\/TITLE\>/) { $serverroot=1; } for ($i=0; $i<=$#bigchunk; ++$i) { # modify URL so that web2news can be used recursively if ($bigchunk[$i] !~ /HREF=\"newspost:\/\//) { $bigchunk[$i] =~ s/HREF=\"/HREF=\"$cgipath?web2newslink=/g; $bigchunk[$i] =~ s/\$/\\\$/g; print $bigchunk[$i]; } } print "<hr>"; } else { # (2) If it is an article, print it out with a little bit of formatting. print "<h3> article </h3>"; $endheader = 0; # give full header display option if (!$fullheader) { print "/<a href=\"$cgipath/web2news?fullheader=1". "&web2newslink=$link\">Show Full Header</a>/"; } print "<table border=0 width=$width>"; for ($i=0; $i<=$#bigchunk; ++$i) { if ($bigchunk[$i] =~ /pre\>/ || $bigchunk[$i] =~ /XMP\>/) { next; } if ($endheader) { # (i) it is not a header print $bigchunk[$i]; } else { # (ii) it is a header chop $bigchunk[$i]; ($field,$value) = split(/:/,$bigchunk[$i],2); # some newsreaders put the values # on different lines : especially if it is looong if (length($field)!=0 && length($value)==0) { $value = $bigchunk[++$i]; } # some use '<' or '>' or '&' that have a , # which might cause problems when displayed # on a browser : no longer needed -- just use <xmp> $value =~ s/</&lt\ /g; $value =~ s/>/&gt\ /g; # Test for the end of the header. # Head and body are separated by a single blank line. if (length($field)==0) { $endheader = 1; print "</table>"; print "<hr>"; print "<table border=0 width=$width>". "<tr bgcolor=$textbgcolor><td>"; print "<xmp>"; next; } # now only print out important headers (if $fullheader # is not set to 1). if ($field !~ /From|Newsgroups|Subject|Date|Organization|Distribution/) { # skip is $fullheader is 0 if (!$fullheader) { next; } } # highlight the subject if ($field eq "Subject") { $color = "$subjcolor"; } else { $color = "$nonsubjcolor"; } # now print it out print "<tr><td bgcolor=$headerfieldcolor>". "<b>$field</b>:</td> "; print "<td bgcolor=$headervaluecolor><i>". "<font color=$color>$value</font>"; print "</i></td></tr>\n"; } # end of header processing block } # end of for loop # end of article -- tidy up print ""; print ""; print "
"; } # print out trailer and exit if ($serverroot) { print "
"; } print $trailer;