--- wwgetall-3.21/wwgetall.pl Thu Oct 29 06:40:13 1998 +++ wwgetall-new/wwgetall.pl Fri Mar 05 21:31:09 1999 @@ -653,22 +653,23 @@ $lostline =""; } $line =~ s/\s+/ /g; - $line =~ s/<\s+//>/g; $line =~ s/\s*=\s*/=/g; $url = ''; #print "line=[$line],lostline=[$lostline]\n"; # print "line = [$line]\n"; - if($line =~ m/]*\s)?href\=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #" - $url = $3 || $4; + if($line =~ m/]*[ "'])?href=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #" + $url = $4 || $5; # print "matched url=[$url]\n"; - }elsif($line =~ m/]*\s)?src=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #" - $url = $3 || $4; + }elsif($line =~ m/]*[ "'])?src=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #" + $url = $4 || $5; # print "IMG match=[$url]\n"; - }elsif($line =~ m/]*\s)?src=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #" - $url = $3 || $4; - }elsif($line =~ m/]*\s)?background=(\"([^\"]*)\"|([^\"]\S+))(\s+[^>]*)?>/i){ #" - $url = $3 || $4; + }elsif($line =~ m/]*[ "'])?src=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #" + $url = $4 || $5; + }elsif($line =~ m/]*[ "'])?background=((["'])(.*?)\3|(\S*))([^>]*)?>/i){ #" + $url = $4 || $5; # print "Backgournd match = [$url]\n" } $url =~ s|\#.*$||; # erase label --- wwgetall-3.21/abs2rel.pl Thu Oct 29 06:40:15 1998 +++ wwgetall-new/abs2rel.pl Thu Mar 04 18:15:35 1999 @@ -40,15 +40,15 @@ sub abs2rel_url { local($currenturl,$url) = @_; -# local($urlhead,$urlbody); + local($urlhead,$urlbody); local($currenturl0,$url0) = ($currenturl,$url); local($host_root_dir,$host_dir); $currenturl =~ s|[^/]*$||; if($currenturl =~ m|^(http://[^/]+)(.*)|){ -# $urlhead = $1; -# $urlbody = $2; + $urlhead = $1; + $urlbody = $2; $host_root_dir = ("../" x (($urlbody =~ tr|/|/|)-1)); $host_dir = "../" . $host_root_dir; } @@ -81,25 +81,20 @@ print "INFO: $file is too large. skipped \n"; return; } - link $file,"$file.bak" || die "can't link [$file] to [$file.bak]:$!"; - open(FH_R,"$file.bak") || die "ERROR: can't open read file[$file.bak]:$!"; +# link $file,"$file.bak" || die "can't link [$file] to [$file.bak]:$!"; + open(FH_R,"$file") || die "ERROR: can't open read file[$file.bak]:$!"; $fromfile = join('',); close(FH_R); &abs2rel_str($currenturl,*fromfile); - open(FH_W,">$file.new") || die "ERROR: can't open write file[$file.new]:$!"; + if($backup){ + rename($file, "$file.bak") || die "can't rename [$file] to [$file.bak]:$!"; + } + open(FH_W,">$file") || die "ERROR: can't open write file[$file.new]:$!"; print FH_W $fromfile; close(FH_W); - print "cwd=[",`pwd`,"]\n"; - print "=========$file.new created=====\n"; +# print "cwd=[",`pwd`,"]\n"; + print "=========$file modified=====\n"; # print "FROMFILE=($fromfile)\n"; - if(! $testmode){ - unlink $file; - link "$file.new",$file || die "can't link [$file.new] to [$file]:$!"; - unlink "$file.new"; - } - if(! $backup){ - unlink "$file.bak" || die "can't unlink [$file.bak]:$!"; - } } sub abs2rel_dir {