#!/usr/local/bin/perl
#	wwget
#        World Wide Getter(?)
#           by Yoshioka Tsuneo(tsuneo-y@is.aist-nara.ac.jp)
#                             (QWF00133@nifty.ne.jp)
#      Copy, Edit, Distribute FREE !!     
#
# support Proxy Server
# support GET,POST,HEAD method
# Only Support HTTP Protocol
#
#	You need client.pl
#       You need perl version 5 or later
#

#print STDERR "starting wwget!\n";
use Socket;

$prog=__FILE__;$progdir=$prog;$progdir=~s|/[^/]*$||;push(@INC,$progdir);
#print "prog=[$prog],progdir=[$progdir]\n";
require 'tcpclient.pl';

local($default_proxy,$public_delegate_server);
local($default_limit_time,$default_limit_size);

# If you need proxy server,leave # and set server,port.
#	$default_proxy="http://server:port/";
# $default_proxy="http://wwwproxy:8080/";
# $public_delegate_server="http://ringer.etl.go.jp/";
# $default_limit_size=50;
# $default_limit_time=60*5;


sub parse_URL
{
    local($URL)=@_;

    local($proto,$server,$port,$path);
    local($post);

    if($URL !~ /^(\w+):/){
	print "URL [$URL] isn't correct.\n";
	return undef;
    }
    $post = $';
    $proto=$1;$proto =~ tr/A-Z/a-z/;
    if($proto eq "http"){
	if(! ($post=~ m|^//([^/:]+)(:(\d+))?(/.*)?$|)){
	    print "<h3>URL [$URL] isn't correct</H3><p>\n";
	    return undef;
	}
	$server=$1;$port=($3 || 80);$path= ( $4 || "/");
    }elsif($proto eq "ftp"){
	if(! ($post=~ m!^//([^/:]+|.*:.*\@[^/:]+)(:(\d+))?(/.*)?$!)){
	    print "<h3>URL [$URL] isn't correct</H3><p>\n";
	    return undef;
	}
	$server=$1; $port=( $3 || 21) ;$path=($4 || "/");
    }elsif($proto eq "file"){
	if(! ($post=~ m|^/+(.*)?$|)){
	    print "<h3>URL [$URL] isn't correct</H3><p>\n";exit 1;
	}
	$path= ( $1 || "/");
    }else{
	print "Error parse_URL():Protocol [$proto] not supported\n";
	return undef;
    }
    # print "proto=$proto,server=$server,port=$port,path=$path\n";
    return ($proto,$server,$port,$path);
}
sub nonblock_readline
{
    local(*S,*buf,$timeout) = @_;
    local($rout);
    local($nfound,$timeleft);
    local($rin) = '';
    local($pos) = 0;
    local($ret);
    local($len)=1;

    vec($rin,fileno(S),1)=1;
    while(1){
	($nfound,$timeleft) = select($rout = $rin,undef,undef,$timeout);
	if($nfound==0){return -1;}
	# ($ret = ioctl(fileno(S),FIONREAD,$len)) || $ret = -1;
	read(S,$c,$len);
	substr($buf,$pos,$len) = $c;
	if($c eq '\n' || $c eq ''){return $pos;}
	$pos ++;
    }
}
sub getbody
{
    local(*S)=@_;
    local($i);
    
    $i=0;$|=1;
    while(read(S,$_,4096)>0){
	$i+=4;
	if(($limit_size ne "") && ($i>$limit_size)){
	    print "<h1>warning: "
		. "this file is over limit ($limit_size bytes)..</h1>\n";
	    return;
	}
	print $_;
    }
}
sub getbody2
{
    local(*S)=@_;
    while(<S>){
	s/\n//g;s/\r//g;
	if(/^$/){
	    last;
	}
    }
    &getbody(*S);
}
sub setheader
{
    local($TAG,$level)=@_;
    local($header,$body);
    if($TAG eq 'GET' || $TAG eq 'HEAD'){
	if($level eq ""){
	    $header="";$body="";
	}else{
	    while(<STDIN>){
		chop;
		$header.= $_ . "\n";
		if(/^$/){last;}
	    }
	    $body="";
	}
    }elsif($TAG eq 'POST'){
	if($level eq ""){
	    while(<STDIN>){
		$body.=$_;
	    }
	    $header.= "Content-length: " . length($body) . "\n";
	}else{
	    local($len)=0;
	    while(<STDIN>){
		chop;
		if(/^$/){last;}
		$header.=$header . "\n";
		if(/^Content-length: (\d+)/){$len=$1;}
	    }
	    if($len!=0){
		read(STDIN,$body,$len);
	    }
	}
    }else{
	print "[wwget error]setheader($TAG,$level)\n";
    }
    # print "setheader($TAG,$level) end\n";
    ($header,$body);
}
sub wwget_usage
{
    local($prog)=`basename $0`;chop($prog);
    print STDERR <<"_EOT_";
usage: $prog [<options>] [GET|POST|HEAD] <URL> [HTTP/<level>]
  option: -s<size> :Limit Size to get(KB) 
                    (default $default_limit_size(kb))
	  -t<time> :Limit Time to get(sec)
                    (default $default_limit_time(sec))
          -p<proxy server URL> :ProxyServer Hostname(or setenv http_proxy)
	  -noproxy :force not to use proxy server
	  -nocache :force not to use cache
(example) $prog http://www.osaka-u.ac.jp/
	  $prog -s50000 -p'http://wwwproxy:8080/' http://www.osaka-u.ac.jp/
          $prog http://www.rd.ecip.osaka-u.ac.jp:8080/ HTTP/1.0
                        and enter Entity Header and Return key
          $prog POST http://hogehoge/cgi-bin/foo.cgi
                         and enter Entity Body and Return Key
_EOT_
}
sub alarm_handler
{
    local($sig)=@_;
    print "<H1>time out via $0 (signal-$sig)</H1>\n";
    close(S);
    exit(0);
}
sub connect_http{
    local(*S,$path,$TAG,$level,$header,$body)=@_;
    # print "connect_http(S,$path,$TAG,$level,$header,$body)\n";
    if(($level eq "") && ($header ne "")){$level="HTTP/1.0";}
    if($level ne ""){
	print  S "$TAG $path $level\n";
	print  S $header;
	print  S  "\n";
	if($body ne ""){print S $body,"\n";}
    }else{
	print  S "$TAG $path HTTP/1.0\r\n";
	print  S $header;
	print  S "\r\n";
    }
    # flush buffer
    local($oldfh)=select(S);$|=1;select($oldfh);
    print S "";
}

#
#  wwget function
#
sub wwget
{
    local(*S,$proxy,$TAG,$URL,$level,$header,$body)=@_;
    local($proto,$server,$port,$path);
    
    # print STDERR "(proxy=$proxy,TAG=$TAG,URL=$URL,level=$level,header=$header)\n";
    if($proxy eq ""){
	$proxy=$default_proxy;
    	if($ENV{'http_proxy'} ne ""){$proxy=$ENV{'http_proxy'};}
    }
    ($proto,$server,$port,$path)=&parse_URL($URL);
    if($proto eq ""){return -1;}
    if($proto eq "file"){
	print "printing $path\n";
	open(S,"<$path")||die "$!: can't open $path";
	#print <FILEFH>;
	#close(FILEFH);
	return 0;
    }
	#if($proto =~ /^(ftp|gopher|wais)$/){
	#	#$URL="http://www.osaka-u.ac.jp:8080/-\%5F-$URL";
	#	$URL="${public_delegate_server}-\%5F-$URL";
	#	($proto,$server,$port,$path)=&parse_URL($URL);
	#}
    #if((  ((gethostbyname("$server"))[0])  eq "") && ($proxy ne "")){
    if($proxy ne ""){
	($proto,$server,$port,$path)=&parse_URL($proxy);
	if($proto eq ""){return -1;}
	$path=$URL;
    }
    # print "server is $server,port=$port,path=$path,proto=$proto,tag=$TAG\n";

    if (&tcp_setup_client(*S,$server,$port)==-1){
	print STDERR "Error: tcp_setup_client($server,$port):$err_msg\n";
	return -1;
    }

    if($proto eq "http"){
	# print "connecting..\n";
	&connect_http(*S,$path,$TAG,$level,$header,$body);
	# print "connected\n";
    }else{
	print STDERR "[wwget:wwget]$0:Protocol [ $proto ] isn't supported\n";
	close(S);
	return -1;
    }
    return 0;
}	
#------------main-----------------------------
if($0 eq __FILE__){
    local($limit_size,$limit_time,$TAG,$level,$proxy);
    local($URL,$nocache,$noproxy);
    local(*S);

    $limit_size=$default_limit_size;$limit_time=$default_limit_time;
    $TAG="GET";$level="";$nocache=0;

    while($_=$ARGV[0]){
	if(/^-s(\d+)$/){
	    $limit_size=$1;
	}elsif(/^-t(\d+)$/){
	    $limit_time=$1;
	}elsif(/^-p(.*)$/){
	    $proxy=$1;
	}elsif(/^-nocache$/){
	    $nocache=1;
	}elsif(/^-noproxy$/){
	    $noproxy=1;
	}elsif(/^-/){
	    &wwget_usage;
	    exit;
	}elsif(/^(GET|POST|HEAD)$/){
	    $TAG=$1;
	}elsif(m|^HTTP/.+$|){
	    $level=$_;
	}else{
	    $URL=$_;
	}
	shift;
    }
    if($URL eq ""){
	&wwget_usage;
	exit 1;
    }
    if($limit_time ne ""){
		$SIG{'ALRM'}='alarm_handler';
		alarm($limit_time);
    }

    ($header,$body)=&setheader($TAG,$level);
    if($nocache){$header.="Pragma: no-cache\n";}
    if(wwget(*S,$proxy,$TAG,$URL,$level,$header,$body)==-1){
	print "wwget error\n";
	exit 1;
    }
    if ($level ne ""){
    	&getbody(*S);  
    }else{
	&getbody2(*S);
    }
    close(S);
}

1;
